]> git.scottworley.com Git - paperdoorknob/commitdiff
Reify Thread
authorScott Worley <scottworley@scottworley.com>
Fri, 12 Jan 2024 02:17:26 +0000 (18:17 -0800)
committerScott Worley <scottworley@scottworley.com>
Fri, 12 Jan 2024 02:48:58 +0000 (18:48 -0800)
glowfic.py
glowfic_test.py
paperdoorknob.py

index 012bd069aca5a3066b443640bc66904d796e44ee..372b3aa05f574a20038cf4cd6661d50ebae4bcd3 100644 (file)
@@ -54,20 +54,25 @@ class Chunk:
 #  * Humans in the community tend to use "posts" to mean chunks.
 
 
-def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
-    def text() -> Tag:
-        body = html.body
-        assert body
-        text = body.find_next("div", class_="post-post")
-        assert isinstance(text, Tag)
-        return text
-
-    def the_replies() -> Iterable[Tag]:
-        rs = html.find_all("div", class_="post-reply")
-        assert all(isinstance(r, Tag) for r in rs)
-        return rs
-
-    return itertools.chain([text()], the_replies())
+class Thread:
+
+    def __init__(self, html: BeautifulSoup) -> None:
+        self._html = html
+
+    def chunkDOMs(self) -> Iterable[Tag]:
+        def text() -> Tag:
+            body = self._html.body
+            assert body
+            text = body.find_next("div", class_="post-post")
+            assert isinstance(text, Tag)
+            return text
+
+        def the_replies() -> Iterable[Tag]:
+            rs = self._html.find_all("div", class_="post-reply")
+            assert all(isinstance(r, Tag) for r in rs)
+            return rs
+
+        return itertools.chain([text()], the_replies())
 
 
 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
index 2d01c2a7e85b2d272fb60ed4dafcb657be72440e..ed32945d50f1bcce42f507346146761c9a029468 100644 (file)
@@ -10,41 +10,41 @@ import unittest
 from bs4 import BeautifulSoup
 
 from images import FakeImageStore
-from glowfic import chunkDOMs, makeChunk
+from glowfic import makeChunk, Thread
 from texify import PandocTexifier
 
 
 class TestSplit(unittest.TestCase):
 
     def testSplit1(self) -> None:
-        soup = BeautifulSoup(b'''
+        t = Thread(BeautifulSoup(b'''
             <html><body><div class="post-container post-post">
               The "post"
-            </div></body></html>''', 'html.parser')
-        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+            </div></body></html>''', 'html.parser'))
+        self.assertEqual([list(t.stripped_strings) for t in t.chunkDOMs()],
                          [['The "post"']])
 
     def testSplit2(self) -> None:
-        soup = BeautifulSoup(b'''
+        t = Thread(BeautifulSoup(b'''
             <html><body>
               <div class="post-container post-post">The "post"</div>
               <div class="flat-post-replies">
                 <div class="post-container post-reply">The "reply"</div>
               </div>
-            </body></html>''', 'html.parser')
-        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+            </body></html>''', 'html.parser'))
+        self.assertEqual([list(t.stripped_strings) for t in t.chunkDOMs()],
                          [['The "post"'], ['The "reply"']])
 
     def testSplit3(self) -> None:
-        soup = BeautifulSoup(b'''
+        t = Thread(BeautifulSoup(b'''
             <html><body>
               <div class="post-container post-post">The "post"</div>
               <div class="flat-post-replies">
                 <div class="post-container post-reply">1st reply</div>
                 <div class="post-container post-reply">2nd reply</div>
               </div>
-            </body></html>''', 'html.parser')
-        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+            </body></html>''', 'html.parser'))
+        self.assertEqual([list(t.stripped_strings) for t in t.chunkDOMs()],
                          [['The "post"'], ['1st reply'], ['2nd reply']])
 
 
@@ -52,8 +52,8 @@ class TestMakeChunk(unittest.TestCase):
 
     def testEmptyContent(self) -> None:
         with open('testdata/empty-content.html', 'rb') as f:
-            soup = BeautifulSoup(f, 'html.parser')
-        c = makeChunk(next(iter(chunkDOMs(soup))), FakeImageStore())
+            t = Thread(BeautifulSoup(f, 'html.parser'))
+        c = makeChunk(next(iter(t.chunkDOMs())), FakeImageStore())
         self.assertEqual(
             c.icon,
             'stored:https://d1anwqy6ci9o1i.cloudfront.net/' +
index 1350784584dc20e3c4cee60c3bfd71321883ffe1..ade0b885d42addb973f415599798bd2e153ac961 100644 (file)
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup
 from bs4.element import Tag
 
 from args import spec_from_commandline_args
-from glowfic import chunkDOMs, flatURL, makeChunk, renderChunk
+from glowfic import flatURL, makeChunk, renderChunk, Thread
 from spec import Spec
 
 
@@ -57,10 +57,11 @@ def process(spec: Spec) -> None:
     html = spec.fetcher.fetch(url)
     spec.log('Parsing HTML...\r')
     dom = parse(spec.htmlfilter(html))
+    thread = Thread(dom)
     spec.log('Counting chunks...\r')
-    num_chunks = ilen(chunkDOMs(dom))
+    num_chunks = ilen(thread.chunkDOMs())
     title = get_title(dom) or "chunk"
-    for i, r in enumerate(chunkDOMs(dom)):
+    for i, r in enumerate(thread.chunkDOMs()):
         percent = 100.0 * i / num_chunks
         spec.log(f'Processing {title} {i} of {num_chunks} ({percent:.1f}%)\r')
         spec.domfilter(r)