]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Reify Thread
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
aa060d9b 8from dataclasses import dataclass
e6adf6ce 9import itertools
1452f8d3 10from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
11
12from typing import Iterable
13
14from bs4 import BeautifulSoup
15from bs4.element import Tag
16
aa060d9b 17from images import ImageStore
d2a41ff4 18from texify import Texifier
aa060d9b
SW
19
20
1452f8d3
SW
21def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
aa060d9b
SW
38@dataclass(frozen=True)
39class Chunk:
40 icon: str | None
37c47bc2
SW
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
aa060d9b
SW
44 content: Tag
45
e6adf6ce
SW
46# We avoid the name "post" because the Glowfic community uses the term
47# inconsistently:
48# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
49# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50# but mostly uses "post" to refer to just the first chunk in a thread
51# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
52# * Readers and this software don't need to distinguish first-chunks and
53# non-first-chunks.
aa060d9b 54# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
55
56
94027099
SW
57class Thread:
58
59 def __init__(self, html: BeautifulSoup) -> None:
60 self._html = html
61
62 def chunkDOMs(self) -> Iterable[Tag]:
63 def text() -> Tag:
64 body = self._html.body
65 assert body
66 text = body.find_next("div", class_="post-post")
67 assert isinstance(text, Tag)
68 return text
69
70 def the_replies() -> Iterable[Tag]:
71 rs = self._html.find_all("div", class_="post-reply")
72 assert all(isinstance(r, Tag) for r in rs)
73 return rs
74
75 return itertools.chain([text()], the_replies())
aa060d9b
SW
76
77
78def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
79
80 def getIcon() -> str | None:
551bb1c9 81 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
82 if icon_div is None:
83 return None
551bb1c9
SW
84 assert isinstance(icon_div, Tag)
85 icon_img = icon_div.findChild('img')
aa060d9b
SW
86 if icon_img is None:
87 return None
88 assert isinstance(icon_img, Tag)
89 return image_store.get_image(icon_img.attrs['src'])
90
37c47bc2 91 def getByClass(css_class: str) -> Tag | None:
551bb1c9 92 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
93 assert tag is None or isinstance(tag, Tag)
94 return tag
aa060d9b 95
62043b2b
SW
96 def stripHREF(tag: Tag) -> None:
97 for c in tag.findChildren("a"):
98 if "href" in c.attrs:
99 del c.attrs["href"]
100
101 def getMeta(css_class: str) -> Tag | None:
102 tag = getByClass(css_class)
103 if tag is None:
104 return None
105 stripHREF(tag)
106 return tag
107
551bb1c9 108 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
109 assert isinstance(content, Tag)
110
111 return Chunk(getIcon(),
62043b2b
SW
112 getMeta('post-character'),
113 getMeta('post-screenname'),
114 getMeta('post-author'),
aa060d9b 115 content)
d2a41ff4
SW
116
117
1fac41bf
SW
118def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
119 return b''.join([
120 br'\glowhead{',
121 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
122 b'}{',
123 texifier.texify(chunk.character) if chunk.character else b'',
124 b'}{',
125 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
126 b'}{',
127 texifier.texify(chunk.author) if chunk.author else b'',
128 b'}',
129 texifier.texify(chunk.content)])
d2a41ff4 130
d2a41ff4 131
1fac41bf
SW
132ContentOnlyLayout = br'''
133\newcommand{\glowhead}[4]{}
134'''
d2a41ff4 135
d2a41ff4 136
1fac41bf
SW
137BelowIconLayout = br'''
138\newcommand{\glowhead}[4]{\wrapstuffclear
67612898
SW
139\begin{wrapstuff}[l]
140\fbox{
141\begin{varwidth}{0.5\textwidth}
142 \smash{\parbox[t][0pt]{0pt}{
143 \setlength{\fboxrule}{0.2pt}
144 \setlength{\fboxsep}{0pt}
145 \vspace{-3.4pt}
146 \fbox{\hspace{107mm}}
147 }\\*}
148 \vspace{-1em}
149\begin{center}
1fac41bf
SW
150#1\ifnotempty
151{#1}{\\*}#2\ifnotempty
152{#2}{\\*}#3\ifnotempty
153{#3}{\\*}#4
67612898
SW
154\end{center}
155\end{varwidth}
23dabdf5 156}
67612898 157\end{wrapstuff}
23dabdf5 158
67612898 159\strut
16385131 160
1fac41bf 161\noindent}'''
f75c1629
SW
162
163
1fac41bf
SW
164# Why is \textwidth not the width of the text?
165# Why is the width of the text .765\textwidth?
166BesideIconLayout = br'''
167\newcommand{\glowhead}[4]{
f75c1629 168
1fac41bf 169\strut
f75c1629 170
1fac41bf
SW
171\noindent\fbox{
172#1
67612898
SW
173\parbox[b]{.765\textwidth}{
174\begin{center}
1fac41bf
SW
175#2\ifnotempty
176{#2}{\\*}#3\ifnotempty
177{#3}{\\*}#4
67612898 178\end{center}
f75c1629 179}
67612898
SW
180}\\*
181\vspace{-0.75em}\\*
1fac41bf 182\noindent}'''