]> git.scottworley.com Git - paperdoorknob/commitdiff
More structure and tests around splitting the page into chunks' DOMs.
authorScott Worley <scottworley@scottworley.com>
Wed, 20 Dec 2023 19:43:09 +0000 (11:43 -0800)
committerScott Worley <scottworley@scottworley.com>
Wed, 20 Dec 2023 19:43:09 +0000 (11:43 -0800)
glowfic.py [new file with mode: 0644]
glowfic_test.py [new file with mode: 0644]
paperdoorknob.py
paperdoorknob_test.py
setup.py

diff --git a/glowfic.py b/glowfic.py
new file mode 100644 (file)
index 0000000..901246b
--- /dev/null
@@ -0,0 +1,39 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import itertools
+
+from typing import Iterable
+
+from bs4 import BeautifulSoup
+from bs4.element import Tag
+
+# We avoid the name "post" because the Glowfic community uses the term
+# inconsistently:
+#  * The Glowfic software sometimes uses "post" to refer to a whole thread
+#    (eg: in the URL), but more often uses "post" to refer to just the first
+#    chunk in a thread.  The non-first chunks are "replies".
+#  * Readers and this software don't need to distinguish first-chunks and
+#    non-first-chunks.
+#  * Humans in the community tend to use "posts" to mean "chunks" ("replies"
+#    in the Glowfic software's lexicon).
+
+
+def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
+    def text() -> Tag:
+        body = html.body
+        assert body
+        text = body.find_next("div", class_="post-post")
+        assert isinstance(text, Tag)
+        return text
+
+    def the_replies() -> Iterable[Tag]:
+        rs = html.find_all("div", class_="post-reply")
+        assert all(isinstance(r, Tag) for r in rs)
+        return rs
+
+    return itertools.chain([text()], the_replies())
diff --git a/glowfic_test.py b/glowfic_test.py
new file mode 100644 (file)
index 0000000..d847333
--- /dev/null
@@ -0,0 +1,50 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import unittest
+
+from bs4 import BeautifulSoup
+
+from glowfic import chunkDOMs
+
+
+class TestSplit(unittest.TestCase):
+
+    def testSplit1(self) -> None:
+        soup = BeautifulSoup(b'''
+            <html><body><div class="post-container post-post">
+              The "post"
+            </div></body></html>''', 'html.parser')
+        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+                         [['The "post"']])
+
+    def testSplit2(self) -> None:
+        soup = BeautifulSoup(b'''
+            <html><body>
+              <div class="post-container post-post">The "post"</div>
+              <div class="flat-post-replies">
+                <div class="post-container post-reply">The "reply"</div>
+              </div>
+            </body></html>''', 'html.parser')
+        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+                         [['The "post"'], ['The "reply"']])
+
+    def testSplit3(self) -> None:
+        soup = BeautifulSoup(b'''
+            <html><body>
+              <div class="post-container post-post">The "post"</div>
+              <div class="flat-post-replies">
+                <div class="post-container post-reply">1st reply</div>
+                <div class="post-container post-reply">2nd reply</div>
+              </div>
+            </body></html>''', 'html.parser')
+        self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+                         [['The "post"'], ['1st reply'], ['2nd reply']])
+
+
+if __name__ == '__main__':
+    unittest.main()
index 26eb7be054a823b6183bea6dff3415dfef713a6a..84ed70228d3d9b4040fc765c3ced2be68d1d58ea 100644 (file)
@@ -5,14 +5,10 @@
 # Free Software Foundation, version 3.
 
 
 # Free Software Foundation, version 3.
 
 
-import itertools
-
-from typing import Iterable
-
 from bs4 import BeautifulSoup
 from bs4 import BeautifulSoup
-from bs4.element import Tag
 
 from args import spec_from_commandline_args
 
 from args import spec_from_commandline_args
+from glowfic import chunkDOMs
 from spec import Spec
 
 
 from spec import Spec
 
 
@@ -20,22 +16,6 @@ def parse(content: bytes) -> BeautifulSoup:
     return BeautifulSoup(content, 'html.parser')
 
 
     return BeautifulSoup(content, 'html.parser')
 
 
-def replies(html: BeautifulSoup) -> Iterable[Tag]:
-    def text() -> Tag:
-        body = html.body
-        assert body
-        text = body.find_next("div", class_="post-post")
-        assert isinstance(text, Tag)
-        return text
-
-    def the_replies() -> Iterable[Tag]:
-        rs = html.find_all("div", class_="post-reply")
-        assert all(isinstance(r, Tag) for r in rs)
-        return rs
-
-    return itertools.chain([text()], the_replies())
-
-
 def process(spec: Spec) -> None:
     spec.texout.write(b'\\documentclass{article}\n')
     if spec.geometry is not None:
 def process(spec: Spec) -> None:
     spec.texout.write(b'\\documentclass{article}\n')
     if spec.geometry is not None:
@@ -44,7 +24,7 @@ def process(spec: Spec) -> None:
                           b']{geometry}\n')
     spec.texout.write(b'\\begin{document}\n')
     html = parse(spec.htmlfilter(spec.fetcher.fetch(spec.url)))
                           b']{geometry}\n')
     spec.texout.write(b'\\begin{document}\n')
     html = parse(spec.htmlfilter(spec.fetcher.fetch(spec.url)))
-    for r in replies(html):
+    for r in chunkDOMs(html):
         spec.domfilter(r)
         spec.texout.write(spec.texifier.texify(r))
     spec.texout.write(b'\\end{document}\n')
         spec.domfilter(r)
         spec.texout.write(spec.texifier.texify(r))
     spec.texout.write(b'\\end{document}\n')
index 65fc2a9f308fbba82657389a87dd9b0971b312da..249b6e42e936fa5f60a18e021c133dc824ca15a6 100644 (file)
@@ -32,16 +32,6 @@ class BaseTestProcess(ABC):
     def fetcher(self) -> Fetcher:
         raise NotImplementedError()
 
     def fetcher(self) -> Fetcher:
         raise NotImplementedError()
 
-    def testReplies(self) -> None:
-        replies = list(paperdoorknob.replies(
-            paperdoorknob.parse(self.fetcher().fetch(self.url()))))
-        for r in replies:
-            ApplyDOMFilters('NoEdit,NoFooter', r)
-        assert [r.text.strip() for r in replies] == [
-            "This is glowfic",
-            "You sure?",
-            "Pretty sure."]
-
     def testProcess(self) -> None:
         buf = io.BytesIO()
         spec = Spec(
     def testProcess(self) -> None:
         buf = io.BytesIO()
         spec = Spec(
index 1ef11e6e9ccebbd71f57eb57453bc64cecc3b1c4..21527392028491df1dfffc0d1b5e870662223d03 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,7 @@ setup(
         'args',
         'domfilter',
         'fetch',
         'args',
         'domfilter',
         'fetch',
+        'glowfic',
         'htmlfilter',
         'images',
         'paperdoorknob',
         'htmlfilter',
         'images',
         'paperdoorknob',