]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
More structure and tests around splitting the page into chunks' DOMs.
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
8import itertools
9
10from typing import Iterable
11
12from bs4 import BeautifulSoup
13from bs4.element import Tag
14
15# We avoid the name "post" because the Glowfic community uses the term
16# inconsistently:
17# * The Glowfic software sometimes uses "post" to refer to a whole thread
18# (eg: in the URL), but more often uses "post" to refer to just the first
19# chunk in a thread. The non-first chunks are "replies".
20# * Readers and this software don't need to distinguish first-chunks and
21# non-first-chunks.
22# * Humans in the community tend to use "posts" to mean "chunks" ("replies"
23# in the Glowfic software's lexicon).
24
25
26def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
27 def text() -> Tag:
28 body = html.body
29 assert body
30 text = body.find_next("div", class_="post-post")
31 assert isinstance(text, Tag)
32 return text
33
34 def the_replies() -> Iterable[Tag]:
35 rs = html.find_all("div", class_="post-reply")
36 assert all(isinstance(r, Tag) for r in rs)
37 return rs
38
39 return itertools.chain([text()], the_replies())