]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Optionally have Thread.__init__ fetch the HTML
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from dataclasses import dataclass
9 import itertools
10 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
11
12 from typing import Iterable
13
14 from bs4 import BeautifulSoup
15 from bs4.element import Tag
16
17 from images import ImageStore
18 from spec import Spec
19 from texify import Texifier
20
21
22 def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29 def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33 def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
39 @dataclass(frozen=True)
40 class Chunk:
41 icon: str | None
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
45 content: Tag
46
47 # We avoid the name "post" because the Glowfic community uses the term
48 # inconsistently:
49 # * The Glowfic software sometimes uses "post" to refer to a whole thread
50 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51 # but mostly uses "post" to refer to just the first chunk in a thread
52 # (in the HTML and UI). The non-first chunks are "replies".
53 # * Readers and this software don't need to distinguish first-chunks and
54 # non-first-chunks.
55 # * Humans in the community tend to use "posts" to mean chunks.
56
57
58 class Thread:
59
60 def __init__(self, thing: BeautifulSoup | Spec) -> None:
61 if isinstance(thing, Spec):
62 spec = thing
63 spec.log('Fetching HTML...\r')
64 html = spec.fetcher.fetch(flatURL(spec.url))
65 spec.log('Parsing HTML...\r')
66 self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
67 else:
68 self._dom = thing
69
70 def title(self) -> str | None:
71 span = self._dom.findChild("span", id="post-title")
72 if not isinstance(span, Tag):
73 return None
74 return span.text.strip()
75
76 def chunkDOMs(self) -> Iterable[Tag]:
77 def text() -> Tag:
78 body = self._dom.body
79 assert body
80 text = body.find_next("div", class_="post-post")
81 assert isinstance(text, Tag)
82 return text
83
84 def the_replies() -> Iterable[Tag]:
85 rs = self._dom.find_all("div", class_="post-reply")
86 assert all(isinstance(r, Tag) for r in rs)
87 return rs
88
89 return itertools.chain([text()], the_replies())
90
91
92 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
93
94 def getIcon() -> str | None:
95 icon_div = chunk_dom.findChild('div', class_='post-icon')
96 if icon_div is None:
97 return None
98 assert isinstance(icon_div, Tag)
99 icon_img = icon_div.findChild('img')
100 if icon_img is None:
101 return None
102 assert isinstance(icon_img, Tag)
103 return image_store.get_image(icon_img.attrs['src'])
104
105 def getByClass(css_class: str) -> Tag | None:
106 tag = chunk_dom.findChild('div', class_=css_class)
107 assert tag is None or isinstance(tag, Tag)
108 return tag
109
110 def stripHREF(tag: Tag) -> None:
111 for c in tag.findChildren("a"):
112 if "href" in c.attrs:
113 del c.attrs["href"]
114
115 def getMeta(css_class: str) -> Tag | None:
116 tag = getByClass(css_class)
117 if tag is None:
118 return None
119 stripHREF(tag)
120 return tag
121
122 content = chunk_dom.findChild('div', class_='post-content')
123 assert isinstance(content, Tag)
124
125 return Chunk(getIcon(),
126 getMeta('post-character'),
127 getMeta('post-screenname'),
128 getMeta('post-author'),
129 content)
130
131
132 def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
133 return b''.join([
134 br'\glowhead{',
135 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
136 b'}{',
137 texifier.texify(chunk.character) if chunk.character else b'',
138 b'}{',
139 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
140 b'}{',
141 texifier.texify(chunk.author) if chunk.author else b'',
142 b'}',
143 texifier.texify(chunk.content)])
144
145
146 ContentOnlyLayout = br'''
147 \newcommand{\glowhead}[4]{}
148 '''
149
150
151 BelowIconLayout = br'''
152 \newcommand{\glowhead}[4]{\wrapstuffclear
153 \begin{wrapstuff}[l]
154 \fbox{
155 \begin{varwidth}{0.5\textwidth}
156 \smash{\parbox[t][0pt]{0pt}{
157 \setlength{\fboxrule}{0.2pt}
158 \setlength{\fboxsep}{0pt}
159 \vspace{-3.4pt}
160 \fbox{\hspace{107mm}}
161 }\\*}
162 \vspace{-1em}
163 \begin{center}
164 #1\ifnotempty
165 {#1}{\\*}#2\ifnotempty
166 {#2}{\\*}#3\ifnotempty
167 {#3}{\\*}#4
168 \end{center}
169 \end{varwidth}
170 }
171 \end{wrapstuff}
172
173 \strut
174
175 \noindent}'''
176
177
178 # Why is \textwidth not the width of the text?
179 # Why is the width of the text .765\textwidth?
180 BesideIconLayout = br'''
181 \newcommand{\glowhead}[4]{
182
183 \strut
184
185 \noindent\fbox{
186 #1
187 \parbox[b]{.765\textwidth}{
188 \begin{center}
189 #2\ifnotempty
190 {#2}{\\*}#3\ifnotempty
191 {#3}{\\*}#4
192 \end{center}
193 }
194 }\\*
195 \vspace{-0.75em}\\*
196 \noindent}'''