X-Git-Url: http://git.scottworley.com/paperdoorknob/blobdiff_plain/940270992010ea5b4c912eb02f502923c0487a31..30b145cc4660ae5d86bd0de88e373d3107ba54d5:/paperdoorknob.py diff --git a/paperdoorknob.py b/paperdoorknob.py index ade0b88..5ad341f 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -4,33 +4,14 @@ # under the terms of the GNU General Public License as published by the # Free Software Foundation, version 3. -from typing import Any, Iterable - -from bs4 import BeautifulSoup -from bs4.element import Tag - from args import spec_from_commandline_args -from glowfic import flatURL, makeChunk, renderChunk, Thread +from glowfic import Thread from spec import Spec -def parse(content: bytes) -> BeautifulSoup: - return BeautifulSoup(content, 'html.parser') - - -def ilen(it: Iterable[Any]) -> int: - return sum(1 for _ in it) - - -def get_title(dom: BeautifulSoup) -> str | None: - span = dom.findChild("span", id="post-title") - if not isinstance(span, Tag): - return None - return span.text - - def process(spec: Spec) -> None: spec.texout.write(br'''\documentclass{article} +\usepackage{amssymb} \usepackage{booktabs} \usepackage{graphicx} \usepackage{longtable} @@ -52,22 +33,7 @@ def process(spec: Spec) -> None: \detokenize{#1}\relax\else #2\fi} %s ''' % (spec.icon_size, spec.layout)) - url = flatURL(spec.url) - spec.log('Fetching HTML...\r') - html = spec.fetcher.fetch(url) - spec.log('Parsing HTML...\r') - dom = parse(spec.htmlfilter(html)) - thread = Thread(dom) - spec.log('Counting chunks...\r') - num_chunks = ilen(thread.chunkDOMs()) - title = get_title(dom) or "chunk" - for i, r in enumerate(thread.chunkDOMs()): - percent = 100.0 * i / num_chunks - spec.log(f'Processing {title} {i} of {num_chunks} ({percent:.1f}%)\r') - spec.domfilter(r) - chunk = makeChunk(r, spec.images) - spec.texout.write(spec.texfilter(renderChunk(spec.texifier, chunk))) - spec.log('') + Thread(spec).emit() spec.texout.write(b'\\end{document}\n')