]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Reify Chunk
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from dataclasses import dataclass
9 import itertools
10
11 from typing import Iterable
12
13 from bs4 import BeautifulSoup
14 from bs4.element import Tag
15
16 from images import ImageStore
17
18
19 @dataclass(frozen=True)
20 class Chunk:
21 icon: str | None
22 character: str | None
23 screen_name: str | None
24 author: str | None
25 content: Tag
26
27 # We avoid the name "post" because the Glowfic community uses the term
28 # inconsistently:
29 # * The Glowfic software sometimes uses "post" to refer to a whole thread
30 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
31 # but mostly uses "post" to refer to just the first chunk in a thread
32 # (in the HTML and UI). The non-first chunks are "replies".
33 # * Readers and this software don't need to distinguish first-chunks and
34 # non-first-chunks.
35 # * Humans in the community tend to use "posts" to mean chunks.
36
37
38 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
39 def text() -> Tag:
40 body = html.body
41 assert body
42 text = body.find_next("div", class_="post-post")
43 assert isinstance(text, Tag)
44 return text
45
46 def the_replies() -> Iterable[Tag]:
47 rs = html.find_all("div", class_="post-reply")
48 assert all(isinstance(r, Tag) for r in rs)
49 return rs
50
51 return itertools.chain([text()], the_replies())
52
53
54 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
55
56 def getIcon() -> str | None:
57 icon_div = chunk_dom.find_next('div', class_='post-icon')
58 if icon_div is None:
59 return None
60 icon_img = icon_div.find_next('img')
61 if icon_img is None:
62 return None
63 assert isinstance(icon_img, Tag)
64 return image_store.get_image(icon_img.attrs['src'])
65
66 def getTextByClass(css_class: str) -> str | None:
67 div = chunk_dom.find_next('div', class_=css_class)
68 if div is None:
69 return None
70 return div.text.strip()
71
72 content = chunk_dom.find_next('div', class_='post-content')
73 assert isinstance(content, Tag)
74
75 return Chunk(getIcon(),
76 getTextByClass('post-character'),
77 getTextByClass('post-screenname'),
78 getTextByClass('post-author'),
79 content)