]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Strip links from meta fields
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from abc import ABC, abstractmethod
9 from dataclasses import dataclass
10 import itertools
11 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
12
13 from typing import Iterable
14
15 from bs4 import BeautifulSoup
16 from bs4.element import Tag
17
18 from images import ImageStore
19 from texify import Texifier
20
21
22 def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29 def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33 def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
39 @dataclass(frozen=True)
40 class Chunk:
41 icon: str | None
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
45 content: Tag
46
47 # We avoid the name "post" because the Glowfic community uses the term
48 # inconsistently:
49 # * The Glowfic software sometimes uses "post" to refer to a whole thread
50 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51 # but mostly uses "post" to refer to just the first chunk in a thread
52 # (in the HTML and UI). The non-first chunks are "replies".
53 # * Readers and this software don't need to distinguish first-chunks and
54 # non-first-chunks.
55 # * Humans in the community tend to use "posts" to mean chunks.
56
57
58 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
59 def text() -> Tag:
60 body = html.body
61 assert body
62 text = body.find_next("div", class_="post-post")
63 assert isinstance(text, Tag)
64 return text
65
66 def the_replies() -> Iterable[Tag]:
67 rs = html.find_all("div", class_="post-reply")
68 assert all(isinstance(r, Tag) for r in rs)
69 return rs
70
71 return itertools.chain([text()], the_replies())
72
73
74 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
75
76 def getIcon() -> str | None:
77 icon_div = chunk_dom.findChild('div', class_='post-icon')
78 if icon_div is None:
79 return None
80 assert isinstance(icon_div, Tag)
81 icon_img = icon_div.findChild('img')
82 if icon_img is None:
83 return None
84 assert isinstance(icon_img, Tag)
85 return image_store.get_image(icon_img.attrs['src'])
86
87 def getByClass(css_class: str) -> Tag | None:
88 tag = chunk_dom.findChild('div', class_=css_class)
89 assert tag is None or isinstance(tag, Tag)
90 return tag
91
92 def stripHREF(tag: Tag) -> None:
93 for c in tag.findChildren("a"):
94 if "href" in c.attrs:
95 del c.attrs["href"]
96
97 def getMeta(css_class: str) -> Tag | None:
98 tag = getByClass(css_class)
99 if tag is None:
100 return None
101 stripHREF(tag)
102 return tag
103
104 content = chunk_dom.findChild('div', class_='post-content')
105 assert isinstance(content, Tag)
106
107 return Chunk(getIcon(),
108 getMeta('post-character'),
109 getMeta('post-screenname'),
110 getMeta('post-author'),
111 content)
112
113
114 def renderIcon(icon_path: str | None, image_size: float) -> bytes | None:
115 if icon_path is None:
116 return None
117 return b'\\includegraphics[width=%fmm,height=%fmm,keepaspectratio]{%s}' % (
118 image_size, image_size, icon_path.encode('UTF-8'))
119
120
121 class Layout(ABC):
122
123 @abstractmethod
124 def renderChunk(self, chunk: Chunk) -> bytes:
125 raise NotImplementedError()
126
127
128 class ContentOnlyLayout(Layout):
129
130 def __init__(self, texifier: Texifier) -> None:
131 self._texifier = texifier
132
133 def renderChunk(self, chunk: Chunk) -> bytes:
134 return self._texifier.texify(chunk.content) + b'\n'
135
136
137 class BelowIconLayout(Layout):
138
139 def __init__(self, texifier: Texifier, image_size: float) -> None:
140 self._texifier = texifier
141 self._image_size = image_size
142
143 def renderChunk(self, chunk: Chunk) -> bytes:
144 icon = renderIcon(chunk.icon, self._image_size)
145 meta = [icon] if icon else []
146 meta += [self._texifier.texify(x)
147 for x in [chunk.character, chunk.screen_name, chunk.author]
148 if x is not None]
149
150 return b'''\\wrapstuffclear
151 \\begin{wrapstuff}[l]
152 \\fbox{
153 \\begin{varwidth}{0.5\\textwidth}
154 \\smash{\\parbox[t][0pt]{0pt}{
155 \\setlength{\\fboxrule}{0.2pt}
156 \\setlength{\\fboxsep}{0pt}
157 \\vspace{-3.4pt}
158 \\fbox{\\hspace{107mm}}
159 }\\\\*}
160 \\vspace{-1em}
161 \\begin{center}
162 %s
163 \\end{center}
164 \\end{varwidth}
165 }
166 \\end{wrapstuff}
167
168 \\strut
169
170 \\noindent %s
171 ''' % (
172 b'\\\\*'.join(meta),
173 self._texifier.texify(chunk.content))
174
175
176 class BesideIconLayout(Layout):
177
178 def __init__(self, texifier: Texifier, image_size: float) -> None:
179 self._texifier = texifier
180 self._image_size = image_size
181
182 def renderChunk(self, chunk: Chunk) -> bytes:
183 icon = renderIcon(chunk.icon, self._image_size)
184 meta = [
185 chunk.character,
186 chunk.screen_name,
187 chunk.author,
188 ]
189
190 # Why is \textwidth not the width of the text?
191 # Why is the width of the text .765\textwidth?
192 return b'''\\noindent\\fbox{
193 %s
194 \\parbox[b]{.765\\textwidth}{
195 \\begin{center}
196 %s
197 \\end{center}
198 }
199 }\\\\*
200 \\vspace{-0.75em}\\\\*
201 \\noindent %s
202
203 \\strut
204
205 ''' % (
206 icon if icon else b'',
207 b'\\\\*'.join(self._texifier.texify(x) for x in meta if x is not None),
208 self._texifier.texify(chunk.content))