]> git.scottworley.com Git - paperdoorknob/blame_incremental - glowfic.py
New dependency: wrapfig TeX package
[paperdoorknob] / glowfic.py
... / ...
CommitLineData
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
8from dataclasses import dataclass
9import itertools
10
11from typing import Iterable
12
13from bs4 import BeautifulSoup
14from bs4.element import Tag
15
16from images import ImageStore
17
18
19@dataclass(frozen=True)
20class Chunk:
21 icon: str | None
22 character: str | None
23 screen_name: str | None
24 author: str | None
25 content: Tag
26
27# We avoid the name "post" because the Glowfic community uses the term
28# inconsistently:
29# * The Glowfic software sometimes uses "post" to refer to a whole thread
30# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
31# but mostly uses "post" to refer to just the first chunk in a thread
32# (in the HTML and UI). The non-first chunks are "replies".
33# * Readers and this software don't need to distinguish first-chunks and
34# non-first-chunks.
35# * Humans in the community tend to use "posts" to mean chunks.
36
37
38def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
39 def text() -> Tag:
40 body = html.body
41 assert body
42 text = body.find_next("div", class_="post-post")
43 assert isinstance(text, Tag)
44 return text
45
46 def the_replies() -> Iterable[Tag]:
47 rs = html.find_all("div", class_="post-reply")
48 assert all(isinstance(r, Tag) for r in rs)
49 return rs
50
51 return itertools.chain([text()], the_replies())
52
53
54def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
55
56 def getIcon() -> str | None:
57 icon_div = chunk_dom.find_next('div', class_='post-icon')
58 if icon_div is None:
59 return None
60 icon_img = icon_div.find_next('img')
61 if icon_img is None:
62 return None
63 assert isinstance(icon_img, Tag)
64 return image_store.get_image(icon_img.attrs['src'])
65
66 def getTextByClass(css_class: str) -> str | None:
67 div = chunk_dom.find_next('div', class_=css_class)
68 if div is None:
69 return None
70 return div.text.strip()
71
72 content = chunk_dom.find_next('div', class_='post-content')
73 assert isinstance(content, Tag)
74
75 return Chunk(getIcon(),
76 getTextByClass('post-character'),
77 getTextByClass('post-screenname'),
78 getTextByClass('post-author'),
79 content)