From: Scott Worley Date: Fri, 12 Jan 2024 03:30:21 +0000 (-0800) Subject: Optionally have Thread.__init__ fetch the HTML X-Git-Url: http://git.scottworley.com/paperdoorknob/commitdiff_plain/70adfbff16123a62d6b25f8ead23789d831d1d70?ds=inline;hp=21e82200b2887fc4f4f9eff4023f4a73547cacb5 Optionally have Thread.__init__ fetch the HTML --- diff --git a/glowfic.py b/glowfic.py index 92ae4ab..8029b5d 100644 --- a/glowfic.py +++ b/glowfic.py @@ -15,6 +15,7 @@ from bs4 import BeautifulSoup from bs4.element import Tag from images import ImageStore +from spec import Spec from texify import Texifier @@ -56,8 +57,15 @@ class Chunk: class Thread: - def __init__(self, dom: BeautifulSoup) -> None: - self._dom = dom + def __init__(self, thing: BeautifulSoup | Spec) -> None: + if isinstance(thing, Spec): + spec = thing + spec.log('Fetching HTML...\r') + html = spec.fetcher.fetch(flatURL(spec.url)) + spec.log('Parsing HTML...\r') + self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser') + else: + self._dom = thing def title(self) -> str | None: span = self._dom.findChild("span", id="post-title") diff --git a/paperdoorknob.py b/paperdoorknob.py index cf50057..d545f61 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -6,17 +6,11 @@ from typing import Any, Iterable -from bs4 import BeautifulSoup - from args import spec_from_commandline_args -from glowfic import flatURL, makeChunk, renderChunk, Thread +from glowfic import makeChunk, renderChunk, Thread from spec import Spec -def parse(content: bytes) -> BeautifulSoup: - return BeautifulSoup(content, 'html.parser') - - def ilen(it: Iterable[Any]) -> int: return sum(1 for _ in it) @@ -44,12 +38,7 @@ def process(spec: Spec) -> None: \detokenize{#1}\relax\else #2\fi} %s ''' % (spec.icon_size, spec.layout)) - url = flatURL(spec.url) - spec.log('Fetching HTML...\r') - html = spec.fetcher.fetch(url) - spec.log('Parsing HTML...\r') - dom = parse(spec.htmlfilter(html)) - thread = Thread(dom) + thread = Thread(spec) spec.log('Counting chunks...\r') num_chunks = ilen(thread.chunkDOMs()) title = thread.title() or "chunk"