]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Learning TeX: Do Layouts with TeX macros
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
aa060d9b 8from dataclasses import dataclass
e6adf6ce 9import itertools
1452f8d3 10from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
11
12from typing import Iterable
13
14from bs4 import BeautifulSoup
15from bs4.element import Tag
16
aa060d9b 17from images import ImageStore
d2a41ff4 18from texify import Texifier
aa060d9b
SW
19
20
1452f8d3
SW
21def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
aa060d9b
SW
38@dataclass(frozen=True)
39class Chunk:
40 icon: str | None
37c47bc2
SW
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
aa060d9b
SW
44 content: Tag
45
e6adf6ce
SW
46# We avoid the name "post" because the Glowfic community uses the term
47# inconsistently:
48# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
49# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50# but mostly uses "post" to refer to just the first chunk in a thread
51# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
52# * Readers and this software don't need to distinguish first-chunks and
53# non-first-chunks.
aa060d9b 54# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
55
56
57def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
58 def text() -> Tag:
59 body = html.body
60 assert body
61 text = body.find_next("div", class_="post-post")
62 assert isinstance(text, Tag)
63 return text
64
65 def the_replies() -> Iterable[Tag]:
66 rs = html.find_all("div", class_="post-reply")
67 assert all(isinstance(r, Tag) for r in rs)
68 return rs
69
70 return itertools.chain([text()], the_replies())
aa060d9b
SW
71
72
73def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
74
75 def getIcon() -> str | None:
551bb1c9 76 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
77 if icon_div is None:
78 return None
551bb1c9
SW
79 assert isinstance(icon_div, Tag)
80 icon_img = icon_div.findChild('img')
aa060d9b
SW
81 if icon_img is None:
82 return None
83 assert isinstance(icon_img, Tag)
84 return image_store.get_image(icon_img.attrs['src'])
85
37c47bc2 86 def getByClass(css_class: str) -> Tag | None:
551bb1c9 87 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
88 assert tag is None or isinstance(tag, Tag)
89 return tag
aa060d9b 90
62043b2b
SW
91 def stripHREF(tag: Tag) -> None:
92 for c in tag.findChildren("a"):
93 if "href" in c.attrs:
94 del c.attrs["href"]
95
96 def getMeta(css_class: str) -> Tag | None:
97 tag = getByClass(css_class)
98 if tag is None:
99 return None
100 stripHREF(tag)
101 return tag
102
551bb1c9 103 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
104 assert isinstance(content, Tag)
105
106 return Chunk(getIcon(),
62043b2b
SW
107 getMeta('post-character'),
108 getMeta('post-screenname'),
109 getMeta('post-author'),
aa060d9b 110 content)
d2a41ff4
SW
111
112
1fac41bf
SW
113def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
114 return b''.join([
115 br'\glowhead{',
116 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
117 b'}{',
118 texifier.texify(chunk.character) if chunk.character else b'',
119 b'}{',
120 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
121 b'}{',
122 texifier.texify(chunk.author) if chunk.author else b'',
123 b'}',
124 texifier.texify(chunk.content)])
d2a41ff4 125
d2a41ff4 126
1fac41bf
SW
127ContentOnlyLayout = br'''
128\newcommand{\glowhead}[4]{}
129'''
d2a41ff4 130
d2a41ff4 131
1fac41bf
SW
132BelowIconLayout = br'''
133\newcommand{\glowhead}[4]{\wrapstuffclear
67612898
SW
134\begin{wrapstuff}[l]
135\fbox{
136\begin{varwidth}{0.5\textwidth}
137 \smash{\parbox[t][0pt]{0pt}{
138 \setlength{\fboxrule}{0.2pt}
139 \setlength{\fboxsep}{0pt}
140 \vspace{-3.4pt}
141 \fbox{\hspace{107mm}}
142 }\\*}
143 \vspace{-1em}
144\begin{center}
1fac41bf
SW
145#1\ifnotempty
146{#1}{\\*}#2\ifnotempty
147{#2}{\\*}#3\ifnotempty
148{#3}{\\*}#4
67612898
SW
149\end{center}
150\end{varwidth}
23dabdf5 151}
67612898 152\end{wrapstuff}
23dabdf5 153
67612898 154\strut
16385131 155
1fac41bf 156\noindent}'''
f75c1629
SW
157
158
1fac41bf
SW
159# Why is \textwidth not the width of the text?
160# Why is the width of the text .765\textwidth?
161BesideIconLayout = br'''
162\newcommand{\glowhead}[4]{
f75c1629 163
1fac41bf 164\strut
f75c1629 165
1fac41bf
SW
166\noindent\fbox{
167#1
67612898
SW
168\parbox[b]{.765\textwidth}{
169\begin{center}
1fac41bf
SW
170#2\ifnotempty
171{#2}{\\*}#3\ifnotempty
172{#3}{\\*}#4
67612898 173\end{center}
f75c1629 174}
67612898
SW
175}\\*
176\vspace{-0.75em}\\*
1fac41bf 177\noindent}'''