]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Learning TeX: Render icon images with TeX command
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
d2a41ff4 8from abc import ABC, abstractmethod
aa060d9b 9from dataclasses import dataclass
e6adf6ce 10import itertools
1452f8d3 11from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
12
13from typing import Iterable
14
15from bs4 import BeautifulSoup
16from bs4.element import Tag
17
aa060d9b 18from images import ImageStore
d2a41ff4 19from texify import Texifier
aa060d9b
SW
20
21
1452f8d3
SW
22def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
aa060d9b
SW
39@dataclass(frozen=True)
40class Chunk:
41 icon: str | None
37c47bc2
SW
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
aa060d9b
SW
45 content: Tag
46
e6adf6ce
SW
47# We avoid the name "post" because the Glowfic community uses the term
48# inconsistently:
49# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
50# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51# but mostly uses "post" to refer to just the first chunk in a thread
52# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
53# * Readers and this software don't need to distinguish first-chunks and
54# non-first-chunks.
aa060d9b 55# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
56
57
58def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
59 def text() -> Tag:
60 body = html.body
61 assert body
62 text = body.find_next("div", class_="post-post")
63 assert isinstance(text, Tag)
64 return text
65
66 def the_replies() -> Iterable[Tag]:
67 rs = html.find_all("div", class_="post-reply")
68 assert all(isinstance(r, Tag) for r in rs)
69 return rs
70
71 return itertools.chain([text()], the_replies())
aa060d9b
SW
72
73
74def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
75
76 def getIcon() -> str | None:
551bb1c9 77 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
78 if icon_div is None:
79 return None
551bb1c9
SW
80 assert isinstance(icon_div, Tag)
81 icon_img = icon_div.findChild('img')
aa060d9b
SW
82 if icon_img is None:
83 return None
84 assert isinstance(icon_img, Tag)
85 return image_store.get_image(icon_img.attrs['src'])
86
37c47bc2 87 def getByClass(css_class: str) -> Tag | None:
551bb1c9 88 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
89 assert tag is None or isinstance(tag, Tag)
90 return tag
aa060d9b 91
62043b2b
SW
92 def stripHREF(tag: Tag) -> None:
93 for c in tag.findChildren("a"):
94 if "href" in c.attrs:
95 del c.attrs["href"]
96
97 def getMeta(css_class: str) -> Tag | None:
98 tag = getByClass(css_class)
99 if tag is None:
100 return None
101 stripHREF(tag)
102 return tag
103
551bb1c9 104 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
105 assert isinstance(content, Tag)
106
107 return Chunk(getIcon(),
62043b2b
SW
108 getMeta('post-character'),
109 getMeta('post-screenname'),
110 getMeta('post-author'),
aa060d9b 111 content)
d2a41ff4
SW
112
113
d2a41ff4
SW
114class Layout(ABC):
115
116 @abstractmethod
117 def renderChunk(self, chunk: Chunk) -> bytes:
118 raise NotImplementedError()
119
120
121class ContentOnlyLayout(Layout):
122
123 def __init__(self, texifier: Texifier) -> None:
124 self._texifier = texifier
125
126 def renderChunk(self, chunk: Chunk) -> bytes:
23dabdf5 127 return self._texifier.texify(chunk.content) + b'\n'
d2a41ff4
SW
128
129
130class BelowIconLayout(Layout):
131
9afdb32a 132 def __init__(self, texifier: Texifier) -> None:
d2a41ff4
SW
133 self._texifier = texifier
134
135 def renderChunk(self, chunk: Chunk) -> bytes:
5f230208
SW
136 meta = []
137 if chunk.icon:
138 meta += [br'\glowicon{%s}' % chunk.icon.encode('UTF-8')]
37c47bc2
SW
139 meta += [self._texifier.texify(x)
140 for x in [chunk.character, chunk.screen_name, chunk.author]
141 if x is not None]
23dabdf5 142
67612898
SW
143 return br'''\wrapstuffclear
144\begin{wrapstuff}[l]
145\fbox{
146\begin{varwidth}{0.5\textwidth}
147 \smash{\parbox[t][0pt]{0pt}{
148 \setlength{\fboxrule}{0.2pt}
149 \setlength{\fboxsep}{0pt}
150 \vspace{-3.4pt}
151 \fbox{\hspace{107mm}}
152 }\\*}
153 \vspace{-1em}
154\begin{center}
d2a41ff4 155%s
67612898
SW
156\end{center}
157\end{varwidth}
23dabdf5 158}
67612898 159\end{wrapstuff}
23dabdf5 160
67612898 161\strut
16385131 162
67612898 163\noindent %s
d2a41ff4 164''' % (
67612898 165 br'\\*'.join(meta),
d2a41ff4 166 self._texifier.texify(chunk.content))
f75c1629
SW
167
168
169class BesideIconLayout(Layout):
170
9afdb32a 171 def __init__(self, texifier: Texifier) -> None:
f75c1629 172 self._texifier = texifier
f75c1629
SW
173
174 def renderChunk(self, chunk: Chunk) -> bytes:
f75c1629
SW
175 meta = [
176 chunk.character,
177 chunk.screen_name,
178 chunk.author,
179 ]
180
181 # Why is \textwidth not the width of the text?
182 # Why is the width of the text .765\textwidth?
67612898 183 return br'''\noindent\fbox{
f75c1629 184%s
67612898
SW
185\parbox[b]{.765\textwidth}{
186\begin{center}
f75c1629 187%s
67612898 188\end{center}
f75c1629 189}
67612898
SW
190}\\*
191\vspace{-0.75em}\\*
192\noindent %s
f75c1629 193
67612898 194\strut
f75c1629
SW
195
196''' % (
5f230208 197 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
67612898 198 br'\\*'.join(self._texifier.texify(x) for x in meta if x is not None),
f75c1629 199 self._texifier.texify(chunk.content))