]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Optionally have Thread.__init__ fetch the HTML
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
aa060d9b 8from dataclasses import dataclass
e6adf6ce 9import itertools
1452f8d3 10from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
11
12from typing import Iterable
13
14from bs4 import BeautifulSoup
15from bs4.element import Tag
16
aa060d9b 17from images import ImageStore
70adfbff 18from spec import Spec
d2a41ff4 19from texify import Texifier
aa060d9b
SW
20
21
1452f8d3
SW
22def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
aa060d9b
SW
39@dataclass(frozen=True)
40class Chunk:
41 icon: str | None
37c47bc2
SW
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
aa060d9b
SW
45 content: Tag
46
e6adf6ce
SW
47# We avoid the name "post" because the Glowfic community uses the term
48# inconsistently:
49# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
50# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51# but mostly uses "post" to refer to just the first chunk in a thread
52# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
53# * Readers and this software don't need to distinguish first-chunks and
54# non-first-chunks.
aa060d9b 55# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
56
57
94027099
SW
58class Thread:
59
70adfbff
SW
60 def __init__(self, thing: BeautifulSoup | Spec) -> None:
61 if isinstance(thing, Spec):
62 spec = thing
63 spec.log('Fetching HTML...\r')
64 html = spec.fetcher.fetch(flatURL(spec.url))
65 spec.log('Parsing HTML...\r')
66 self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
67 else:
68 self._dom = thing
94027099 69
21e82200
SW
70 def title(self) -> str | None:
71 span = self._dom.findChild("span", id="post-title")
72 if not isinstance(span, Tag):
73 return None
74 return span.text.strip()
75
94027099
SW
76 def chunkDOMs(self) -> Iterable[Tag]:
77 def text() -> Tag:
a18519bf 78 body = self._dom.body
94027099
SW
79 assert body
80 text = body.find_next("div", class_="post-post")
81 assert isinstance(text, Tag)
82 return text
83
84 def the_replies() -> Iterable[Tag]:
a18519bf 85 rs = self._dom.find_all("div", class_="post-reply")
94027099
SW
86 assert all(isinstance(r, Tag) for r in rs)
87 return rs
88
89 return itertools.chain([text()], the_replies())
aa060d9b
SW
90
91
92def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
93
94 def getIcon() -> str | None:
551bb1c9 95 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
96 if icon_div is None:
97 return None
551bb1c9
SW
98 assert isinstance(icon_div, Tag)
99 icon_img = icon_div.findChild('img')
aa060d9b
SW
100 if icon_img is None:
101 return None
102 assert isinstance(icon_img, Tag)
103 return image_store.get_image(icon_img.attrs['src'])
104
37c47bc2 105 def getByClass(css_class: str) -> Tag | None:
551bb1c9 106 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
107 assert tag is None or isinstance(tag, Tag)
108 return tag
aa060d9b 109
62043b2b
SW
110 def stripHREF(tag: Tag) -> None:
111 for c in tag.findChildren("a"):
112 if "href" in c.attrs:
113 del c.attrs["href"]
114
115 def getMeta(css_class: str) -> Tag | None:
116 tag = getByClass(css_class)
117 if tag is None:
118 return None
119 stripHREF(tag)
120 return tag
121
551bb1c9 122 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
123 assert isinstance(content, Tag)
124
125 return Chunk(getIcon(),
62043b2b
SW
126 getMeta('post-character'),
127 getMeta('post-screenname'),
128 getMeta('post-author'),
aa060d9b 129 content)
d2a41ff4
SW
130
131
1fac41bf
SW
132def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
133 return b''.join([
134 br'\glowhead{',
135 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
136 b'}{',
137 texifier.texify(chunk.character) if chunk.character else b'',
138 b'}{',
139 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
140 b'}{',
141 texifier.texify(chunk.author) if chunk.author else b'',
142 b'}',
143 texifier.texify(chunk.content)])
d2a41ff4 144
d2a41ff4 145
1fac41bf
SW
146ContentOnlyLayout = br'''
147\newcommand{\glowhead}[4]{}
148'''
d2a41ff4 149
d2a41ff4 150
1fac41bf
SW
151BelowIconLayout = br'''
152\newcommand{\glowhead}[4]{\wrapstuffclear
67612898
SW
153\begin{wrapstuff}[l]
154\fbox{
155\begin{varwidth}{0.5\textwidth}
156 \smash{\parbox[t][0pt]{0pt}{
157 \setlength{\fboxrule}{0.2pt}
158 \setlength{\fboxsep}{0pt}
159 \vspace{-3.4pt}
160 \fbox{\hspace{107mm}}
161 }\\*}
162 \vspace{-1em}
163\begin{center}
1fac41bf
SW
164#1\ifnotempty
165{#1}{\\*}#2\ifnotempty
166{#2}{\\*}#3\ifnotempty
167{#3}{\\*}#4
67612898
SW
168\end{center}
169\end{varwidth}
23dabdf5 170}
67612898 171\end{wrapstuff}
23dabdf5 172
67612898 173\strut
16385131 174
1fac41bf 175\noindent}'''
f75c1629
SW
176
177
1fac41bf
SW
178# Why is \textwidth not the width of the text?
179# Why is the width of the text .765\textwidth?
180BesideIconLayout = br'''
181\newcommand{\glowhead}[4]{
f75c1629 182
1fac41bf 183\strut
f75c1629 184
1fac41bf
SW
185\noindent\fbox{
186#1
67612898
SW
187\parbox[b]{.765\textwidth}{
188\begin{center}
1fac41bf
SW
189#2\ifnotempty
190{#2}{\\*}#3\ifnotempty
191{#3}{\\*}#4
67612898 192\end{center}
f75c1629 193}
67612898
SW
194}\\*
195\vspace{-0.75em}\\*
1fac41bf 196\noindent}'''