X-Git-Url: http://git.scottworley.com/paperdoorknob/blobdiff_plain/7b4b681229f07e431e0945b41b956a28fa1258e4..refs/heads/main:/paperdoorknob.py diff --git a/paperdoorknob.py b/paperdoorknob.py index 1350784..5ad341f 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -4,33 +4,14 @@ # under the terms of the GNU General Public License as published by the # Free Software Foundation, version 3. -from typing import Any, Iterable - -from bs4 import BeautifulSoup -from bs4.element import Tag - from args import spec_from_commandline_args -from glowfic import chunkDOMs, flatURL, makeChunk, renderChunk +from glowfic import Thread from spec import Spec -def parse(content: bytes) -> BeautifulSoup: - return BeautifulSoup(content, 'html.parser') - - -def ilen(it: Iterable[Any]) -> int: - return sum(1 for _ in it) - - -def get_title(dom: BeautifulSoup) -> str | None: - span = dom.findChild("span", id="post-title") - if not isinstance(span, Tag): - return None - return span.text - - def process(spec: Spec) -> None: spec.texout.write(br'''\documentclass{article} +\usepackage{amssymb} \usepackage{booktabs} \usepackage{graphicx} \usepackage{longtable} @@ -52,21 +33,7 @@ def process(spec: Spec) -> None: \detokenize{#1}\relax\else #2\fi} %s ''' % (spec.icon_size, spec.layout)) - url = flatURL(spec.url) - spec.log('Fetching HTML...\r') - html = spec.fetcher.fetch(url) - spec.log('Parsing HTML...\r') - dom = parse(spec.htmlfilter(html)) - spec.log('Counting chunks...\r') - num_chunks = ilen(chunkDOMs(dom)) - title = get_title(dom) or "chunk" - for i, r in enumerate(chunkDOMs(dom)): - percent = 100.0 * i / num_chunks - spec.log(f'Processing {title} {i} of {num_chunks} ({percent:.1f}%)\r') - spec.domfilter(r) - chunk = makeChunk(r, spec.images) - spec.texout.write(spec.texfilter(renderChunk(spec.texifier, chunk))) - spec.log('') + Thread(spec).emit() spec.texout.write(b'\\end{document}\n')