--- /dev/null
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import itertools
+
+from typing import Iterable
+
+from bs4 import BeautifulSoup
+from bs4.element import Tag
+
+# We avoid the name "post" because the Glowfic community uses the term
+# inconsistently:
+# * The Glowfic software sometimes uses "post" to refer to a whole thread
+# (eg: in the URL), but more often uses "post" to refer to just the first
+# chunk in a thread. The non-first chunks are "replies".
+# * Readers and this software don't need to distinguish first-chunks and
+# non-first-chunks.
+# * Humans in the community tend to use "posts" to mean "chunks" ("replies"
+# in the Glowfic software's lexicon).
+
+
+def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
+ def text() -> Tag:
+ body = html.body
+ assert body
+ text = body.find_next("div", class_="post-post")
+ assert isinstance(text, Tag)
+ return text
+
+ def the_replies() -> Iterable[Tag]:
+ rs = html.find_all("div", class_="post-reply")
+ assert all(isinstance(r, Tag) for r in rs)
+ return rs
+
+ return itertools.chain([text()], the_replies())
--- /dev/null
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import unittest
+
+from bs4 import BeautifulSoup
+
+from glowfic import chunkDOMs
+
+
+class TestSplit(unittest.TestCase):
+
+ def testSplit1(self) -> None:
+ soup = BeautifulSoup(b'''
+ <html><body><div class="post-container post-post">
+ The "post"
+ </div></body></html>''', 'html.parser')
+ self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+ [['The "post"']])
+
+ def testSplit2(self) -> None:
+ soup = BeautifulSoup(b'''
+ <html><body>
+ <div class="post-container post-post">The "post"</div>
+ <div class="flat-post-replies">
+ <div class="post-container post-reply">The "reply"</div>
+ </div>
+ </body></html>''', 'html.parser')
+ self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+ [['The "post"'], ['The "reply"']])
+
+ def testSplit3(self) -> None:
+ soup = BeautifulSoup(b'''
+ <html><body>
+ <div class="post-container post-post">The "post"</div>
+ <div class="flat-post-replies">
+ <div class="post-container post-reply">1st reply</div>
+ <div class="post-container post-reply">2nd reply</div>
+ </div>
+ </body></html>''', 'html.parser')
+ self.assertEqual([list(t.stripped_strings) for t in chunkDOMs(soup)],
+ [['The "post"'], ['1st reply'], ['2nd reply']])
+
+
+if __name__ == '__main__':
+ unittest.main()
# Free Software Foundation, version 3.
-import itertools
-
-from typing import Iterable
-
from bs4 import BeautifulSoup
-from bs4.element import Tag
from args import spec_from_commandline_args
+from glowfic import chunkDOMs
from spec import Spec
return BeautifulSoup(content, 'html.parser')
-def replies(html: BeautifulSoup) -> Iterable[Tag]:
- def text() -> Tag:
- body = html.body
- assert body
- text = body.find_next("div", class_="post-post")
- assert isinstance(text, Tag)
- return text
-
- def the_replies() -> Iterable[Tag]:
- rs = html.find_all("div", class_="post-reply")
- assert all(isinstance(r, Tag) for r in rs)
- return rs
-
- return itertools.chain([text()], the_replies())
-
-
def process(spec: Spec) -> None:
spec.texout.write(b'\\documentclass{article}\n')
if spec.geometry is not None:
b']{geometry}\n')
spec.texout.write(b'\\begin{document}\n')
html = parse(spec.htmlfilter(spec.fetcher.fetch(spec.url)))
- for r in replies(html):
+ for r in chunkDOMs(html):
spec.domfilter(r)
spec.texout.write(spec.texifier.texify(r))
spec.texout.write(b'\\end{document}\n')
def fetcher(self) -> Fetcher:
raise NotImplementedError()
- def testReplies(self) -> None:
- replies = list(paperdoorknob.replies(
- paperdoorknob.parse(self.fetcher().fetch(self.url()))))
- for r in replies:
- ApplyDOMFilters('NoEdit,NoFooter', r)
- assert [r.text.strip() for r in replies] == [
- "This is glowfic",
- "You sure?",
- "Pretty sure."]
-
def testProcess(self) -> None:
buf = io.BytesIO()
spec = Spec(
'args',
'domfilter',
'fetch',
+ 'glowfic',
'htmlfilter',
'images',
'paperdoorknob',