]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
New dependency: wrapfig TeX package
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
aa060d9b 8from dataclasses import dataclass
e6adf6ce
SW
9import itertools
10
11from typing import Iterable
12
13from bs4 import BeautifulSoup
14from bs4.element import Tag
15
aa060d9b
SW
16from images import ImageStore
17
18
19@dataclass(frozen=True)
20class Chunk:
21 icon: str | None
22 character: str | None
23 screen_name: str | None
24 author: str | None
25 content: Tag
26
e6adf6ce
SW
27# We avoid the name "post" because the Glowfic community uses the term
28# inconsistently:
29# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
30# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
31# but mostly uses "post" to refer to just the first chunk in a thread
32# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
33# * Readers and this software don't need to distinguish first-chunks and
34# non-first-chunks.
aa060d9b 35# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
36
37
38def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
39 def text() -> Tag:
40 body = html.body
41 assert body
42 text = body.find_next("div", class_="post-post")
43 assert isinstance(text, Tag)
44 return text
45
46 def the_replies() -> Iterable[Tag]:
47 rs = html.find_all("div", class_="post-reply")
48 assert all(isinstance(r, Tag) for r in rs)
49 return rs
50
51 return itertools.chain([text()], the_replies())
aa060d9b
SW
52
53
54def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
55
56 def getIcon() -> str | None:
57 icon_div = chunk_dom.find_next('div', class_='post-icon')
58 if icon_div is None:
59 return None
60 icon_img = icon_div.find_next('img')
61 if icon_img is None:
62 return None
63 assert isinstance(icon_img, Tag)
64 return image_store.get_image(icon_img.attrs['src'])
65
66 def getTextByClass(css_class: str) -> str | None:
67 div = chunk_dom.find_next('div', class_=css_class)
68 if div is None:
69 return None
70 return div.text.strip()
71
72 content = chunk_dom.find_next('div', class_='post-content')
73 assert isinstance(content, Tag)
74
75 return Chunk(getIcon(),
76 getTextByClass('post-character'),
77 getTextByClass('post-screenname'),
78 getTextByClass('post-author'),
79 content)