]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Escape character names
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from abc import ABC, abstractmethod
9 from dataclasses import dataclass
10 import itertools
11 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
12
13 from typing import Iterable
14
15 from bs4 import BeautifulSoup
16 from bs4.element import Tag
17
18 from images import ImageStore
19 from texify import Texifier
20
21
22 def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29 def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33 def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
39 @dataclass(frozen=True)
40 class Chunk:
41 icon: str | None
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
45 content: Tag
46
47 # We avoid the name "post" because the Glowfic community uses the term
48 # inconsistently:
49 # * The Glowfic software sometimes uses "post" to refer to a whole thread
50 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51 # but mostly uses "post" to refer to just the first chunk in a thread
52 # (in the HTML and UI). The non-first chunks are "replies".
53 # * Readers and this software don't need to distinguish first-chunks and
54 # non-first-chunks.
55 # * Humans in the community tend to use "posts" to mean chunks.
56
57
58 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
59 def text() -> Tag:
60 body = html.body
61 assert body
62 text = body.find_next("div", class_="post-post")
63 assert isinstance(text, Tag)
64 return text
65
66 def the_replies() -> Iterable[Tag]:
67 rs = html.find_all("div", class_="post-reply")
68 assert all(isinstance(r, Tag) for r in rs)
69 return rs
70
71 return itertools.chain([text()], the_replies())
72
73
74 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
75
76 def getIcon() -> str | None:
77 icon_div = chunk_dom.find_next('div', class_='post-icon')
78 if icon_div is None:
79 return None
80 icon_img = icon_div.find_next('img')
81 if icon_img is None:
82 return None
83 assert isinstance(icon_img, Tag)
84 return image_store.get_image(icon_img.attrs['src'])
85
86 def getByClass(css_class: str) -> Tag | None:
87 tag = chunk_dom.find_next('div', class_=css_class)
88 assert tag is None or isinstance(tag, Tag)
89 return tag
90
91 content = chunk_dom.find_next('div', class_='post-content')
92 assert isinstance(content, Tag)
93
94 return Chunk(getIcon(),
95 getByClass('post-character'),
96 getByClass('post-screenname'),
97 getByClass('post-author'),
98 content)
99
100
101 def renderIcon(icon_path: str | None, image_size: float) -> str | None:
102 params = f'width={image_size}mm,height={image_size}mm,keepaspectratio'
103 return f'\\includegraphics[{params}]{{{icon_path}}}' if icon_path else None
104
105
106 class Layout(ABC):
107
108 @abstractmethod
109 def renderChunk(self, chunk: Chunk) -> bytes:
110 raise NotImplementedError()
111
112
113 class ContentOnlyLayout(Layout):
114
115 def __init__(self, texifier: Texifier) -> None:
116 self._texifier = texifier
117
118 def renderChunk(self, chunk: Chunk) -> bytes:
119 return self._texifier.texify(chunk.content) + b'\n'
120
121
122 class BelowIconLayout(Layout):
123
124 def __init__(self, texifier: Texifier, image_size: float) -> None:
125 self._texifier = texifier
126 self._image_size = image_size
127
128 def renderChunk(self, chunk: Chunk) -> bytes:
129 icon = renderIcon(chunk.icon, self._image_size)
130 meta = [icon.encode('UTF-8')] if icon else []
131 meta += [self._texifier.texify(x)
132 for x in [chunk.character, chunk.screen_name, chunk.author]
133 if x is not None]
134
135 return b'''\\wrapstuffclear
136 \\begin{wrapstuff}[l]
137 \\fbox{
138 \\begin{varwidth}{0.5\\textwidth}
139 \\smash{\\parbox[t][0pt]{0pt}{
140 \\setlength{\\fboxrule}{0.2pt}
141 \\setlength{\\fboxsep}{0pt}
142 \\vspace{-3.4pt}
143 \\fbox{\\hspace{107mm}}
144 }\\\\*}
145 \\vspace{-1em}
146 \\begin{center}
147 %s
148 \\end{center}
149 \\end{varwidth}
150 }
151 \\end{wrapstuff}
152
153 \\strut
154
155 \\noindent %s
156 ''' % (
157 b'\\\\*'.join(meta),
158 self._texifier.texify(chunk.content))
159
160
161 class BesideIconLayout(Layout):
162
163 def __init__(self, texifier: Texifier, image_size: float) -> None:
164 self._texifier = texifier
165 self._image_size = image_size
166
167 def renderChunk(self, chunk: Chunk) -> bytes:
168 icon = renderIcon(chunk.icon, self._image_size)
169 meta = [
170 chunk.character,
171 chunk.screen_name,
172 chunk.author,
173 ]
174
175 # Why is \textwidth not the width of the text?
176 # Why is the width of the text .765\textwidth?
177 return b'''\\noindent\\fbox{
178 %s
179 \\parbox[b]{.765\\textwidth}{
180 \\begin{center}
181 %s
182 \\end{center}
183 }
184 }\\\\*
185 \\vspace{-0.75em}\\\\*
186 \\noindent %s
187
188 \\strut
189
190 ''' % (
191 icon.encode('UTF-8') if icon else b'',
192 b'\\\\*'.join(self._texifier.texify(x) for x in meta if x is not None),
193 self._texifier.texify(chunk.content))