Reify Thread

author Scott Worley <scottworley@scottworley.com>

Fri, 12 Jan 2024 02:17:26 +0000 (18:17 -0800)

committer Scott Worley <scottworley@scottworley.com>

Fri, 12 Jan 2024 02:48:58 +0000 (18:48 -0800)
author Scott Worley <scottworley@scottworley.com>
Fri, 12 Jan 2024 02:17:26 +0000 (18:17 -0800)
committer Scott Worley <scottworley@scottworley.com>
Fri, 12 Jan 2024 02:48:58 +0000 (18:48 -0800)
diff --git a/glowfic.py b/glowfic.py

index 012bd069aca5a3066b443640bc66904d796e44ee..372b3aa05f574a20038cf4cd6661d50ebae4bcd3 100644 (file)
--- a/glowfic.py
+++ b/glowfic.py
@@ -54,20 +54,25 @@ class Chunk:
  #  * Humans in the community tend to use "posts" to mean chunks.
  
  
-def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
-    def text() -> Tag:
-        body = html.body
-        assert body
-        text = body.find_next("div", class_="post-post")
-        assert isinstance(text, Tag)
-        return text
-
-    def the_replies() -> Iterable[Tag]:
-        rs = html.find_all("div", class_="post-reply")
-        assert all(isinstance(r, Tag) for r in rs)
-        return rs
-
-    return itertools.chain([text()], the_replies())
+class Thread:
+
+    def __init__(self, html: BeautifulSoup) -> None:
+        self._html = html
+
+    def chunkDOMs(self) -> Iterable[Tag]:
+        def text() -> Tag:
+            body = self._html.body
+            assert body
+            text = body.find_next("div", class_="post-post")
+            assert isinstance(text, Tag)
+            return text
+
+        def the_replies() -> Iterable[Tag]:
+            rs = self._html.find_all("div", class_="post-reply")
+            assert all(isinstance(r, Tag) for r in rs)
+            return rs
+
+        return itertools.chain([text()], the_replies())
  
  
  def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
diff --git a/glowfic_test.py b/glowfic_test.py

index 2d01c2a7e85b2d272fb60ed4dafcb657be72440e..ed32945d50f1bcce42f507346146761c9a029468 100644 (file)
--- a/glowfic_test.py
+++ b/glowfic_test.py
@@ -10,41 +10,41 @@ import unittest
  from bs4 import BeautifulSoup
  
  from images import FakeImageStore
-from glowfic import chunkDOMs, makeChunk
+from glowfic import makeChunk, Thread
  from texify import PandocTexifier
  
  
  class TestSplit(unittest.TestCase):
  
      def testSplit1(self) -> None:
-        soup = BeautifulSoup(b'''
+        t = Thread(BeautifulSoup(b'''
              <html><body><div class="post-container post-post">
                The "post"
-            </div></body></html>''', 'html.parser')
-        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+            </div></body></html>''', 'html.parser'))
+        self.assertEqual([list(t.stripped_strings) for t in t.chunkDOMs()],
                           [['The "post"']])
  
      def testSplit2(self) -> None:
-        soup = BeautifulSoup(b'''
+        t = Thread(BeautifulSoup(b'''
              <html><body>
                <div class="post-container post-post">The "post"</div>
                <div class="flat-post-replies">
                  <div class="post-container post-reply">The "reply"</div>
                </div>
-            </body></html>''', 'html.parser')
-        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+            </body></html>''', 'html.parser'))
+        self.assertEqual([list(t.stripped_strings) for t in t.chunkDOMs()],
                           [['The "post"'], ['The "reply"']])
  
      def testSplit3(self) -> None:
-        soup = BeautifulSoup(b'''
+        t = Thread(BeautifulSoup(b'''
              <html><body>
                <div class="post-container post-post">The "post"</div>
                <div class="flat-post-replies">
                  <div class="post-container post-reply">1st reply</div>
                  <div class="post-container post-reply">2nd reply</div>
                </div>
-            </body></html>''', 'html.parser')
-        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+            </body></html>''', 'html.parser'))
+        self.assertEqual([list(t.stripped_strings) for t in t.chunkDOMs()],
                           [['The "post"'], ['1st reply'], ['2nd reply']])
  
  
@@ -52,8 +52,8 @@ class TestMakeChunk(unittest.TestCase):
  
      def testEmptyContent(self) -> None:
          with open('testdata/empty-content.html', 'rb') as f:
-            soup = BeautifulSoup(f, 'html.parser')
-        c = makeChunk(next(iter(chunkDOMs(soup))), FakeImageStore())
+            t = Thread(BeautifulSoup(f, 'html.parser'))
+        c = makeChunk(next(iter(t.chunkDOMs())), FakeImageStore())
          self.assertEqual(
              c.icon,
              'stored:https://d1anwqy6ci9o1i.cloudfront.net/' +
diff --git a/paperdoorknob.py b/paperdoorknob.py

index 1350784584dc20e3c4cee60c3bfd71321883ffe1..ade0b885d42addb973f415599798bd2e153ac961 100644 (file)
--- a/paperdoorknob.py
+++ b/paperdoorknob.py
@@ -10,7 +10,7 @@ from bs4 import BeautifulSoup
  from bs4.element import Tag
  
  from args import spec_from_commandline_args
-from glowfic import chunkDOMs, flatURL, makeChunk, renderChunk
+from glowfic import flatURL, makeChunk, renderChunk, Thread
  from spec import Spec
  
  
@@ -57,10 +57,11 @@ def process(spec: Spec) -> None:
      html = spec.fetcher.fetch(url)
      spec.log('Parsing HTML...\r')
      dom = parse(spec.htmlfilter(html))
+    thread = Thread(dom)
      spec.log('Counting chunks...\r')
-    num_chunks = ilen(chunkDOMs(dom))
+    num_chunks = ilen(thread.chunkDOMs())
      title = get_title(dom) or "chunk"
-    for i, r in enumerate(chunkDOMs(dom)):
+    for i, r in enumerate(thread.chunkDOMs()):
          percent = 100.0 * i / num_chunks
          spec.log(f'Processing {title} {i} of {num_chunks} ({percent:.1f}%)\r')
          spec.domfilter(r)
author	Scott Worley <scottworley@scottworley.com>
	Fri, 12 Jan 2024 02:17:26 +0000 (18:17 -0800)
committer	Scott Worley <scottworley@scottworley.com>
	Fri, 12 Jan 2024 02:48:58 +0000 (18:48 -0800)
glowfic.py		patch \| blob \| blame \| history
glowfic_test.py		patch \| blob \| blame \| history
paperdoorknob.py		patch \| blob \| blame \| history