]> git.scottworley.com Git - paperdoorknob/blame - glowfic.py
Support _ in URLs
[paperdoorknob] / glowfic.py
CommitLineData
e6adf6ce
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
d2a41ff4 8from abc import ABC, abstractmethod
aa060d9b 9from dataclasses import dataclass
e6adf6ce 10import itertools
1452f8d3 11from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
e6adf6ce
SW
12
13from typing import Iterable
14
15from bs4 import BeautifulSoup
16from bs4.element import Tag
17
aa060d9b 18from images import ImageStore
d2a41ff4 19from texify import Texifier
aa060d9b
SW
20
21
1452f8d3
SW
22def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
aa060d9b
SW
39@dataclass(frozen=True)
40class Chunk:
41 icon: str | None
37c47bc2
SW
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
aa060d9b
SW
45 content: Tag
46
e6adf6ce
SW
47# We avoid the name "post" because the Glowfic community uses the term
48# inconsistently:
49# * The Glowfic software sometimes uses "post" to refer to a whole thread
aa060d9b
SW
50# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51# but mostly uses "post" to refer to just the first chunk in a thread
52# (in the HTML and UI). The non-first chunks are "replies".
e6adf6ce
SW
53# * Readers and this software don't need to distinguish first-chunks and
54# non-first-chunks.
aa060d9b 55# * Humans in the community tend to use "posts" to mean chunks.
e6adf6ce
SW
56
57
58def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
59 def text() -> Tag:
60 body = html.body
61 assert body
62 text = body.find_next("div", class_="post-post")
63 assert isinstance(text, Tag)
64 return text
65
66 def the_replies() -> Iterable[Tag]:
67 rs = html.find_all("div", class_="post-reply")
68 assert all(isinstance(r, Tag) for r in rs)
69 return rs
70
71 return itertools.chain([text()], the_replies())
aa060d9b
SW
72
73
74def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
75
76 def getIcon() -> str | None:
551bb1c9 77 icon_div = chunk_dom.findChild('div', class_='post-icon')
aa060d9b
SW
78 if icon_div is None:
79 return None
551bb1c9
SW
80 assert isinstance(icon_div, Tag)
81 icon_img = icon_div.findChild('img')
aa060d9b
SW
82 if icon_img is None:
83 return None
84 assert isinstance(icon_img, Tag)
85 return image_store.get_image(icon_img.attrs['src'])
86
37c47bc2 87 def getByClass(css_class: str) -> Tag | None:
551bb1c9 88 tag = chunk_dom.findChild('div', class_=css_class)
37c47bc2
SW
89 assert tag is None or isinstance(tag, Tag)
90 return tag
aa060d9b 91
551bb1c9 92 content = chunk_dom.findChild('div', class_='post-content')
aa060d9b
SW
93 assert isinstance(content, Tag)
94
95 return Chunk(getIcon(),
37c47bc2
SW
96 getByClass('post-character'),
97 getByClass('post-screenname'),
98 getByClass('post-author'),
aa060d9b 99 content)
d2a41ff4
SW
100
101
39db9a10
SW
102def renderIcon(icon_path: str | None, image_size: float) -> bytes | None:
103 if icon_path is None:
104 return None
105 return b'\\includegraphics[width=%fmm,height=%fmm,keepaspectratio]{%s}' % (
106 image_size, image_size, icon_path.encode('UTF-8'))
d2a41ff4
SW
107
108
109class Layout(ABC):
110
111 @abstractmethod
112 def renderChunk(self, chunk: Chunk) -> bytes:
113 raise NotImplementedError()
114
115
116class ContentOnlyLayout(Layout):
117
118 def __init__(self, texifier: Texifier) -> None:
119 self._texifier = texifier
120
121 def renderChunk(self, chunk: Chunk) -> bytes:
23dabdf5 122 return self._texifier.texify(chunk.content) + b'\n'
d2a41ff4
SW
123
124
125class BelowIconLayout(Layout):
126
c62e8d40 127 def __init__(self, texifier: Texifier, image_size: float) -> None:
d2a41ff4 128 self._texifier = texifier
c62e8d40 129 self._image_size = image_size
d2a41ff4
SW
130
131 def renderChunk(self, chunk: Chunk) -> bytes:
37c47bc2 132 icon = renderIcon(chunk.icon, self._image_size)
39db9a10 133 meta = [icon] if icon else []
37c47bc2
SW
134 meta += [self._texifier.texify(x)
135 for x in [chunk.character, chunk.screen_name, chunk.author]
136 if x is not None]
23dabdf5
SW
137
138 return b'''\\wrapstuffclear
139\\begin{wrapstuff}[l]
140\\fbox{
141\\begin{varwidth}{0.5\\textwidth}
142 \\smash{\\parbox[t][0pt]{0pt}{
143 \\setlength{\\fboxrule}{0.2pt}
144 \\setlength{\\fboxsep}{0pt}
145 \\vspace{-3.4pt}
146 \\fbox{\\hspace{107mm}}
147 }\\\\*}
148 \\vspace{-1em}
149\\begin{center}
d2a41ff4 150%s
23dabdf5
SW
151\\end{center}
152\\end{varwidth}
153}
154\\end{wrapstuff}
155
156\\strut
16385131 157
7198d9eb 158\\noindent %s
d2a41ff4 159''' % (
37c47bc2 160 b'\\\\*'.join(meta),
d2a41ff4 161 self._texifier.texify(chunk.content))
f75c1629
SW
162
163
164class BesideIconLayout(Layout):
165
166 def __init__(self, texifier: Texifier, image_size: float) -> None:
167 self._texifier = texifier
168 self._image_size = image_size
169
170 def renderChunk(self, chunk: Chunk) -> bytes:
171 icon = renderIcon(chunk.icon, self._image_size)
172 meta = [
173 chunk.character,
174 chunk.screen_name,
175 chunk.author,
176 ]
177
178 # Why is \textwidth not the width of the text?
179 # Why is the width of the text .765\textwidth?
180 return b'''\\noindent\\fbox{
181%s
182\\parbox[b]{.765\\textwidth}{
183\\begin{center}
184%s
185\\end{center}
186}
187}\\\\*
188\\vspace{-0.75em}\\\\*
189\\noindent %s
190
191\\strut
192
193''' % (
39db9a10 194 icon if icon else b'',
37c47bc2 195 b'\\\\*'.join(self._texifier.texify(x) for x in meta if x is not None),
f75c1629 196 self._texifier.texify(chunk.content))