]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
More structure and tests around splitting the page into chunks' DOMs.
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 import itertools
9
10 from typing import Iterable
11
12 from bs4 import BeautifulSoup
13 from bs4.element import Tag
14
15 # We avoid the name "post" because the Glowfic community uses the term
16 # inconsistently:
17 # * The Glowfic software sometimes uses "post" to refer to a whole thread
18 # (eg: in the URL), but more often uses "post" to refer to just the first
19 # chunk in a thread. The non-first chunks are "replies".
20 # * Readers and this software don't need to distinguish first-chunks and
21 # non-first-chunks.
22 # * Humans in the community tend to use "posts" to mean "chunks" ("replies"
23 # in the Glowfic software's lexicon).
24
25
26 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
27 def text() -> Tag:
28 body = html.body
29 assert body
30 text = body.find_next("div", class_="post-post")
31 assert isinstance(text, Tag)
32 return text
33
34 def the_replies() -> Iterable[Tag]:
35 rs = html.find_all("div", class_="post-reply")
36 assert all(isinstance(r, Tag) for r in rs)
37 return rs
38
39 return itertools.chain([text()], the_replies())