]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Move get_title() to Thread
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
aa060d9b 8from dataclasses import dataclass
e6adf6ce 9import itertools
1452f8d3 10from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
11
12from typing import Iterable
13
14from bs4 import BeautifulSoup
15from bs4.element import Tag
16
aa060d9b 17from images import ImageStore
d2a41ff4 18from texify import Texifier
aa060d9b
SW
19
20
1452f8d3
SW
21def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
aa060d9b
SW
38@dataclass(frozen=True)
39class Chunk:
40 icon: str | None
37c47bc2
SW
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
aa060d9b
SW
44 content: Tag
45
e6adf6ce
SW
46# We avoid the name "post" because the Glowfic community uses the term
47# inconsistently:
48# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
49# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50# but mostly uses "post" to refer to just the first chunk in a thread
51# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
52# * Readers and this software don't need to distinguish first-chunks and
53# non-first-chunks.
aa060d9b 54# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
55
56
94027099
SW
57class Thread:
58
a18519bf
SW
59 def __init__(self, dom: BeautifulSoup) -> None:
60 self._dom = dom
94027099 61
21e82200
SW
62 def title(self) -> str | None:
63 span = self._dom.findChild("span", id="post-title")
64 if not isinstance(span, Tag):
65 return None
66 return span.text.strip()
67
94027099
SW
68 def chunkDOMs(self) -> Iterable[Tag]:
69 def text() -> Tag:
a18519bf 70 body = self._dom.body
94027099
SW
71 assert body
72 text = body.find_next("div", class_="post-post")
73 assert isinstance(text, Tag)
74 return text
75
76 def the_replies() -> Iterable[Tag]:
a18519bf 77 rs = self._dom.find_all("div", class_="post-reply")
94027099
SW
78 assert all(isinstance(r, Tag) for r in rs)
79 return rs
80
81 return itertools.chain([text()], the_replies())
aa060d9b
SW
82
83
84def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
85
86 def getIcon() -> str | None:
551bb1c9 87 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
88 if icon_div is None:
89 return None
551bb1c9
SW
90 assert isinstance(icon_div, Tag)
91 icon_img = icon_div.findChild('img')
aa060d9b
SW
92 if icon_img is None:
93 return None
94 assert isinstance(icon_img, Tag)
95 return image_store.get_image(icon_img.attrs['src'])
96
37c47bc2 97 def getByClass(css_class: str) -> Tag | None:
551bb1c9 98 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
99 assert tag is None or isinstance(tag, Tag)
100 return tag
aa060d9b 101
62043b2b
SW
102 def stripHREF(tag: Tag) -> None:
103 for c in tag.findChildren("a"):
104 if "href" in c.attrs:
105 del c.attrs["href"]
106
107 def getMeta(css_class: str) -> Tag | None:
108 tag = getByClass(css_class)
109 if tag is None:
110 return None
111 stripHREF(tag)
112 return tag
113
551bb1c9 114 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
115 assert isinstance(content, Tag)
116
117 return Chunk(getIcon(),
62043b2b
SW
118 getMeta('post-character'),
119 getMeta('post-screenname'),
120 getMeta('post-author'),
aa060d9b 121 content)
d2a41ff4
SW
122
123
1fac41bf
SW
124def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
125 return b''.join([
126 br'\glowhead{',
127 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
128 b'}{',
129 texifier.texify(chunk.character) if chunk.character else b'',
130 b'}{',
131 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
132 b'}{',
133 texifier.texify(chunk.author) if chunk.author else b'',
134 b'}',
135 texifier.texify(chunk.content)])
d2a41ff4 136
d2a41ff4 137
1fac41bf
SW
138ContentOnlyLayout = br'''
139\newcommand{\glowhead}[4]{}
140'''
d2a41ff4 141
d2a41ff4 142
1fac41bf
SW
143BelowIconLayout = br'''
144\newcommand{\glowhead}[4]{\wrapstuffclear
67612898
SW
145\begin{wrapstuff}[l]
146\fbox{
147\begin{varwidth}{0.5\textwidth}
148 \smash{\parbox[t][0pt]{0pt}{
149 \setlength{\fboxrule}{0.2pt}
150 \setlength{\fboxsep}{0pt}
151 \vspace{-3.4pt}
152 \fbox{\hspace{107mm}}
153 }\\*}
154 \vspace{-1em}
155\begin{center}
1fac41bf
SW
156#1\ifnotempty
157{#1}{\\*}#2\ifnotempty
158{#2}{\\*}#3\ifnotempty
159{#3}{\\*}#4
67612898
SW
160\end{center}
161\end{varwidth}
23dabdf5 162}
67612898 163\end{wrapstuff}
23dabdf5 164
67612898 165\strut
16385131 166
1fac41bf 167\noindent}'''
f75c1629
SW
168
169
1fac41bf
SW
170# Why is \textwidth not the width of the text?
171# Why is the width of the text .765\textwidth?
172BesideIconLayout = br'''
173\newcommand{\glowhead}[4]{
f75c1629 174
1fac41bf 175\strut
f75c1629 176
1fac41bf
SW
177\noindent\fbox{
178#1
67612898
SW
179\parbox[b]{.765\textwidth}{
180\begin{center}
1fac41bf
SW
181#2\ifnotempty
182{#2}{\\*}#3\ifnotempty
183{#3}{\\*}#4
67612898 184\end{center}
f75c1629 185}
67612898
SW
186}\\*
187\vspace{-0.75em}\\*
1fac41bf 188\noindent}'''