]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
573050872d3d3d3ef6e5647c02dcb4f238e65e21
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from abc import ABC, abstractmethod
9 from dataclasses import dataclass
10 import itertools
11 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
12
13 from typing import Iterable
14
15 from bs4 import BeautifulSoup
16 from bs4.element import Tag
17
18 from images import ImageStore
19 from texify import Texifier
20
21
22 def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29 def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33 def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
39 @dataclass(frozen=True)
40 class Chunk:
41 icon: str | None
42 character: str | None
43 screen_name: str | None
44 author: str | None
45 content: Tag
46
47 # We avoid the name "post" because the Glowfic community uses the term
48 # inconsistently:
49 # * The Glowfic software sometimes uses "post" to refer to a whole thread
50 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51 # but mostly uses "post" to refer to just the first chunk in a thread
52 # (in the HTML and UI). The non-first chunks are "replies".
53 # * Readers and this software don't need to distinguish first-chunks and
54 # non-first-chunks.
55 # * Humans in the community tend to use "posts" to mean chunks.
56
57
58 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
59 def text() -> Tag:
60 body = html.body
61 assert body
62 text = body.find_next("div", class_="post-post")
63 assert isinstance(text, Tag)
64 return text
65
66 def the_replies() -> Iterable[Tag]:
67 rs = html.find_all("div", class_="post-reply")
68 assert all(isinstance(r, Tag) for r in rs)
69 return rs
70
71 return itertools.chain([text()], the_replies())
72
73
74 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
75
76 def getIcon() -> str | None:
77 icon_div = chunk_dom.find_next('div', class_='post-icon')
78 if icon_div is None:
79 return None
80 icon_img = icon_div.find_next('img')
81 if icon_img is None:
82 return None
83 assert isinstance(icon_img, Tag)
84 return image_store.get_image(icon_img.attrs['src'])
85
86 def getTextByClass(css_class: str) -> str | None:
87 div = chunk_dom.find_next('div', class_=css_class)
88 if div is None:
89 return None
90 return div.text.strip()
91
92 content = chunk_dom.find_next('div', class_='post-content')
93 assert isinstance(content, Tag)
94
95 return Chunk(getIcon(),
96 getTextByClass('post-character'),
97 getTextByClass('post-screenname'),
98 getTextByClass('post-author'),
99 content)
100
101
102 def renderIcon(icon_path: str | None, image_size: float) -> str | None:
103 params = f'width={image_size}mm,height={image_size}mm,keepaspectratio'
104 return f'\\includegraphics[{params}]{{{icon_path}}}' if icon_path else None
105
106
107 class Layout(ABC):
108
109 @abstractmethod
110 def renderChunk(self, chunk: Chunk) -> bytes:
111 raise NotImplementedError()
112
113
114 class ContentOnlyLayout(Layout):
115
116 def __init__(self, texifier: Texifier) -> None:
117 self._texifier = texifier
118
119 def renderChunk(self, chunk: Chunk) -> bytes:
120 return self._texifier.texify(chunk.content) + b'\n'
121
122
123 class BelowIconLayout(Layout):
124
125 def __init__(self, texifier: Texifier, image_size: float) -> None:
126 self._texifier = texifier
127 self._image_size = image_size
128
129 def renderChunk(self, chunk: Chunk) -> bytes:
130 meta = [
131 renderIcon(chunk.icon, self._image_size),
132 chunk.character,
133 chunk.screen_name,
134 chunk.author,
135 ]
136
137 return b'''\\wrapstuffclear
138 \\begin{wrapstuff}[l]
139 \\fbox{
140 \\begin{varwidth}{0.5\\textwidth}
141 \\smash{\\parbox[t][0pt]{0pt}{
142 \\setlength{\\fboxrule}{0.2pt}
143 \\setlength{\\fboxsep}{0pt}
144 \\vspace{-3.4pt}
145 \\fbox{\\hspace{107mm}}
146 }\\\\*}
147 \\vspace{-1em}
148 \\begin{center}
149 %s
150 \\end{center}
151 \\end{varwidth}
152 }
153 \\end{wrapstuff}
154
155 \\strut
156
157 \\noindent %s
158 ''' % (
159 b'\\\\*'.join(x.encode('UTF-8') for x in meta if x is not None),
160 self._texifier.texify(chunk.content))
161
162
163 class BesideIconLayout(Layout):
164
165 def __init__(self, texifier: Texifier, image_size: float) -> None:
166 self._texifier = texifier
167 self._image_size = image_size
168
169 def renderChunk(self, chunk: Chunk) -> bytes:
170 icon = renderIcon(chunk.icon, self._image_size)
171 meta = [
172 chunk.character,
173 chunk.screen_name,
174 chunk.author,
175 ]
176
177 # Why is \textwidth not the width of the text?
178 # Why is the width of the text .765\textwidth?
179 return b'''\\noindent\\fbox{
180 %s
181 \\parbox[b]{.765\\textwidth}{
182 \\begin{center}
183 %s
184 \\end{center}
185 }
186 }\\\\*
187 \\vspace{-0.75em}\\\\*
188 \\noindent %s
189
190 \\strut
191
192 ''' % (
193 icon.encode('UTF-8') if icon else b'',
194 b'\\\\*'.join(x.encode('UTF-8') for x in meta if x is not None),
195 self._texifier.texify(chunk.content))