]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Rename html → dom
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from dataclasses import dataclass
9 import itertools
10 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
11
12 from typing import Iterable
13
14 from bs4 import BeautifulSoup
15 from bs4.element import Tag
16
17 from images import ImageStore
18 from texify import Texifier
19
20
21 def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28 def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32 def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
38 @dataclass(frozen=True)
39 class Chunk:
40 icon: str | None
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
44 content: Tag
45
46 # We avoid the name "post" because the Glowfic community uses the term
47 # inconsistently:
48 # * The Glowfic software sometimes uses "post" to refer to a whole thread
49 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50 # but mostly uses "post" to refer to just the first chunk in a thread
51 # (in the HTML and UI). The non-first chunks are "replies".
52 # * Readers and this software don't need to distinguish first-chunks and
53 # non-first-chunks.
54 # * Humans in the community tend to use "posts" to mean chunks.
55
56
57 class Thread:
58
59 def __init__(self, dom: BeautifulSoup) -> None:
60 self._dom = dom
61
62 def chunkDOMs(self) -> Iterable[Tag]:
63 def text() -> Tag:
64 body = self._dom.body
65 assert body
66 text = body.find_next("div", class_="post-post")
67 assert isinstance(text, Tag)
68 return text
69
70 def the_replies() -> Iterable[Tag]:
71 rs = self._dom.find_all("div", class_="post-reply")
72 assert all(isinstance(r, Tag) for r in rs)
73 return rs
74
75 return itertools.chain([text()], the_replies())
76
77
78 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
79
80 def getIcon() -> str | None:
81 icon_div = chunk_dom.findChild('div', class_='post-icon')
82 if icon_div is None:
83 return None
84 assert isinstance(icon_div, Tag)
85 icon_img = icon_div.findChild('img')
86 if icon_img is None:
87 return None
88 assert isinstance(icon_img, Tag)
89 return image_store.get_image(icon_img.attrs['src'])
90
91 def getByClass(css_class: str) -> Tag | None:
92 tag = chunk_dom.findChild('div', class_=css_class)
93 assert tag is None or isinstance(tag, Tag)
94 return tag
95
96 def stripHREF(tag: Tag) -> None:
97 for c in tag.findChildren("a"):
98 if "href" in c.attrs:
99 del c.attrs["href"]
100
101 def getMeta(css_class: str) -> Tag | None:
102 tag = getByClass(css_class)
103 if tag is None:
104 return None
105 stripHREF(tag)
106 return tag
107
108 content = chunk_dom.findChild('div', class_='post-content')
109 assert isinstance(content, Tag)
110
111 return Chunk(getIcon(),
112 getMeta('post-character'),
113 getMeta('post-screenname'),
114 getMeta('post-author'),
115 content)
116
117
118 def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
119 return b''.join([
120 br'\glowhead{',
121 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
122 b'}{',
123 texifier.texify(chunk.character) if chunk.character else b'',
124 b'}{',
125 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
126 b'}{',
127 texifier.texify(chunk.author) if chunk.author else b'',
128 b'}',
129 texifier.texify(chunk.content)])
130
131
132 ContentOnlyLayout = br'''
133 \newcommand{\glowhead}[4]{}
134 '''
135
136
137 BelowIconLayout = br'''
138 \newcommand{\glowhead}[4]{\wrapstuffclear
139 \begin{wrapstuff}[l]
140 \fbox{
141 \begin{varwidth}{0.5\textwidth}
142 \smash{\parbox[t][0pt]{0pt}{
143 \setlength{\fboxrule}{0.2pt}
144 \setlength{\fboxsep}{0pt}
145 \vspace{-3.4pt}
146 \fbox{\hspace{107mm}}
147 }\\*}
148 \vspace{-1em}
149 \begin{center}
150 #1\ifnotempty
151 {#1}{\\*}#2\ifnotempty
152 {#2}{\\*}#3\ifnotempty
153 {#3}{\\*}#4
154 \end{center}
155 \end{varwidth}
156 }
157 \end{wrapstuff}
158
159 \strut
160
161 \noindent}'''
162
163
164 # Why is \textwidth not the width of the text?
165 # Why is the width of the text .765\textwidth?
166 BesideIconLayout = br'''
167 \newcommand{\glowhead}[4]{
168
169 \strut
170
171 \noindent\fbox{
172 #1
173 \parbox[b]{.765\textwidth}{
174 \begin{center}
175 #2\ifnotempty
176 {#2}{\\*}#3\ifnotempty
177 {#3}{\\*}#4
178 \end{center}
179 }
180 }\\*
181 \vspace{-0.75em}\\*
182 \noindent}'''