]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Always have Thread.__init__ fetch the HTML
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
aa060d9b 8from dataclasses import dataclass
e6adf6ce 9import itertools
1452f8d3 10from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
11
12from typing import Iterable
13
14from bs4 import BeautifulSoup
15from bs4.element import Tag
16
aa060d9b 17from images import ImageStore
70adfbff 18from spec import Spec
d2a41ff4 19from texify import Texifier
aa060d9b
SW
20
21
1452f8d3
SW
22def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
aa060d9b
SW
39@dataclass(frozen=True)
40class Chunk:
41 icon: str | None
37c47bc2
SW
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
aa060d9b
SW
45 content: Tag
46
e6adf6ce
SW
47# We avoid the name "post" because the Glowfic community uses the term
48# inconsistently:
49# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
50# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51# but mostly uses "post" to refer to just the first chunk in a thread
52# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
53# * Readers and this software don't need to distinguish first-chunks and
54# non-first-chunks.
aa060d9b 55# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
56
57
94027099
SW
58class Thread:
59
19ed28f3
SW
60 def __init__(self, spec: Spec) -> None:
61 spec.log('Fetching HTML...\r')
62 html = spec.fetcher.fetch(flatURL(spec.url))
63 spec.log('Parsing HTML...\r')
64 self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
94027099 65
21e82200
SW
66 def title(self) -> str | None:
67 span = self._dom.findChild("span", id="post-title")
68 if not isinstance(span, Tag):
69 return None
70 return span.text.strip()
71
94027099
SW
72 def chunkDOMs(self) -> Iterable[Tag]:
73 def text() -> Tag:
a18519bf 74 body = self._dom.body
94027099
SW
75 assert body
76 text = body.find_next("div", class_="post-post")
77 assert isinstance(text, Tag)
78 return text
79
80 def the_replies() -> Iterable[Tag]:
a18519bf 81 rs = self._dom.find_all("div", class_="post-reply")
94027099
SW
82 assert all(isinstance(r, Tag) for r in rs)
83 return rs
84
85 return itertools.chain([text()], the_replies())
aa060d9b
SW
86
87
88def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
89
90 def getIcon() -> str | None:
551bb1c9 91 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
92 if icon_div is None:
93 return None
551bb1c9
SW
94 assert isinstance(icon_div, Tag)
95 icon_img = icon_div.findChild('img')
aa060d9b
SW
96 if icon_img is None:
97 return None
98 assert isinstance(icon_img, Tag)
99 return image_store.get_image(icon_img.attrs['src'])
100
37c47bc2 101 def getByClass(css_class: str) -> Tag | None:
551bb1c9 102 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
103 assert tag is None or isinstance(tag, Tag)
104 return tag
aa060d9b 105
62043b2b
SW
106 def stripHREF(tag: Tag) -> None:
107 for c in tag.findChildren("a"):
108 if "href" in c.attrs:
109 del c.attrs["href"]
110
111 def getMeta(css_class: str) -> Tag | None:
112 tag = getByClass(css_class)
113 if tag is None:
114 return None
115 stripHREF(tag)
116 return tag
117
551bb1c9 118 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
119 assert isinstance(content, Tag)
120
121 return Chunk(getIcon(),
62043b2b
SW
122 getMeta('post-character'),
123 getMeta('post-screenname'),
124 getMeta('post-author'),
aa060d9b 125 content)
d2a41ff4
SW
126
127
1fac41bf
SW
128def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
129 return b''.join([
130 br'\glowhead{',
131 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
132 b'}{',
133 texifier.texify(chunk.character) if chunk.character else b'',
134 b'}{',
135 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
136 b'}{',
137 texifier.texify(chunk.author) if chunk.author else b'',
138 b'}',
139 texifier.texify(chunk.content)])
d2a41ff4 140
d2a41ff4 141
1fac41bf
SW
142ContentOnlyLayout = br'''
143\newcommand{\glowhead}[4]{}
144'''
d2a41ff4 145
d2a41ff4 146
1fac41bf
SW
147BelowIconLayout = br'''
148\newcommand{\glowhead}[4]{\wrapstuffclear
67612898
SW
149\begin{wrapstuff}[l]
150\fbox{
151\begin{varwidth}{0.5\textwidth}
152 \smash{\parbox[t][0pt]{0pt}{
153 \setlength{\fboxrule}{0.2pt}
154 \setlength{\fboxsep}{0pt}
155 \vspace{-3.4pt}
156 \fbox{\hspace{107mm}}
157 }\\*}
158 \vspace{-1em}
159\begin{center}
1fac41bf
SW
160#1\ifnotempty
161{#1}{\\*}#2\ifnotempty
162{#2}{\\*}#3\ifnotempty
163{#3}{\\*}#4
67612898
SW
164\end{center}
165\end{varwidth}
23dabdf5 166}
67612898 167\end{wrapstuff}
23dabdf5 168
67612898 169\strut
16385131 170
1fac41bf 171\noindent}'''
f75c1629
SW
172
173
1fac41bf
SW
174# Why is \textwidth not the width of the text?
175# Why is the width of the text .765\textwidth?
176BesideIconLayout = br'''
177\newcommand{\glowhead}[4]{
f75c1629 178
1fac41bf 179\strut
f75c1629 180
1fac41bf
SW
181\noindent\fbox{
182#1
67612898
SW
183\parbox[b]{.765\textwidth}{
184\begin{center}
1fac41bf
SW
185#2\ifnotempty
186{#2}{\\*}#3\ifnotempty
187{#3}{\\*}#4
67612898 188\end{center}
f75c1629 189}
67612898
SW
190}\\*
191\vspace{-0.75em}\\*
1fac41bf 192\noindent}'''