From 70adfbff16123a62d6b25f8ead23789d831d1d70 Mon Sep 17 00:00:00 2001 From: Scott Worley Date: Thu, 11 Jan 2024 19:30:21 -0800 Subject: [PATCH 1/1] Optionally have Thread.__init__ fetch the HTML --- glowfic.py | 12 ++++++++++-- paperdoorknob.py | 15 ++------------- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/glowfic.py b/glowfic.py index 92ae4ab..8029b5d 100644 --- a/glowfic.py +++ b/glowfic.py @@ -15,6 +15,7 @@ from bs4 import BeautifulSoup from bs4.element import Tag from images import ImageStore +from spec import Spec from texify import Texifier @@ -56,8 +57,15 @@ class Chunk: class Thread: - def __init__(self, dom: BeautifulSoup) -> None: - self._dom = dom + def __init__(self, thing: BeautifulSoup | Spec) -> None: + if isinstance(thing, Spec): + spec = thing + spec.log('Fetching HTML...\r') + html = spec.fetcher.fetch(flatURL(spec.url)) + spec.log('Parsing HTML...\r') + self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser') + else: + self._dom = thing def title(self) -> str | None: span = self._dom.findChild("span", id="post-title") diff --git a/paperdoorknob.py b/paperdoorknob.py index cf50057..d545f61 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -6,17 +6,11 @@ from typing import Any, Iterable -from bs4 import BeautifulSoup - from args import spec_from_commandline_args -from glowfic import flatURL, makeChunk, renderChunk, Thread +from glowfic import makeChunk, renderChunk, Thread from spec import Spec -def parse(content: bytes) -> BeautifulSoup: - return BeautifulSoup(content, 'html.parser') - - def ilen(it: Iterable[Any]) -> int: return sum(1 for _ in it) @@ -44,12 +38,7 @@ def process(spec: Spec) -> None: \detokenize{#1}\relax\else #2\fi} %s ''' % (spec.icon_size, spec.layout)) - url = flatURL(spec.url) - spec.log('Fetching HTML...\r') - html = spec.fetcher.fetch(url) - spec.log('Parsing HTML...\r') - dom = parse(spec.htmlfilter(html)) - thread = Thread(dom) + thread = Thread(spec) spec.log('Counting chunks...\r') num_chunks = ilen(thread.chunkDOMs()) title = thread.title() or "chunk" -- 2.44.1