]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Support _ in URLs
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from abc import ABC, abstractmethod
9 from dataclasses import dataclass
10 import itertools
11 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
12
13 from typing import Iterable
14
15 from bs4 import BeautifulSoup
16 from bs4.element import Tag
17
18 from images import ImageStore
19 from texify import Texifier
20
21
22 def _removeViewFromURL(url: str) -> str:
23 u = urlparse(url)
24 old_qs = parse_qsl(u.query)
25 new_qs = [(k, v) for k, v in old_qs if k != 'view']
26 return urlunparse(u._replace(query=urlencode(new_qs)))
27
28
29 def nonFlatURL(url: str) -> str:
30 return _removeViewFromURL(url)
31
32
33 def flatURL(url: str) -> str:
34 u = urlparse(_removeViewFromURL(url))
35 qs = parse_qsl(u.query) + [('view', 'flat')]
36 return urlunparse(u._replace(query=urlencode(qs)))
37
38
39 @dataclass(frozen=True)
40 class Chunk:
41 icon: str | None
42 character: Tag | None
43 screen_name: Tag | None
44 author: Tag | None
45 content: Tag
46
47 # We avoid the name "post" because the Glowfic community uses the term
48 # inconsistently:
49 # * The Glowfic software sometimes uses "post" to refer to a whole thread
50 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
51 # but mostly uses "post" to refer to just the first chunk in a thread
52 # (in the HTML and UI). The non-first chunks are "replies".
53 # * Readers and this software don't need to distinguish first-chunks and
54 # non-first-chunks.
55 # * Humans in the community tend to use "posts" to mean chunks.
56
57
58 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
59 def text() -> Tag:
60 body = html.body
61 assert body
62 text = body.find_next("div", class_="post-post")
63 assert isinstance(text, Tag)
64 return text
65
66 def the_replies() -> Iterable[Tag]:
67 rs = html.find_all("div", class_="post-reply")
68 assert all(isinstance(r, Tag) for r in rs)
69 return rs
70
71 return itertools.chain([text()], the_replies())
72
73
74 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
75
76 def getIcon() -> str | None:
77 icon_div = chunk_dom.findChild('div', class_='post-icon')
78 if icon_div is None:
79 return None
80 assert isinstance(icon_div, Tag)
81 icon_img = icon_div.findChild('img')
82 if icon_img is None:
83 return None
84 assert isinstance(icon_img, Tag)
85 return image_store.get_image(icon_img.attrs['src'])
86
87 def getByClass(css_class: str) -> Tag | None:
88 tag = chunk_dom.findChild('div', class_=css_class)
89 assert tag is None or isinstance(tag, Tag)
90 return tag
91
92 content = chunk_dom.findChild('div', class_='post-content')
93 assert isinstance(content, Tag)
94
95 return Chunk(getIcon(),
96 getByClass('post-character'),
97 getByClass('post-screenname'),
98 getByClass('post-author'),
99 content)
100
101
102 def renderIcon(icon_path: str | None, image_size: float) -> bytes | None:
103 if icon_path is None:
104 return None
105 return b'\\includegraphics[width=%fmm,height=%fmm,keepaspectratio]{%s}' % (
106 image_size, image_size, icon_path.encode('UTF-8'))
107
108
109 class Layout(ABC):
110
111 @abstractmethod
112 def renderChunk(self, chunk: Chunk) -> bytes:
113 raise NotImplementedError()
114
115
116 class ContentOnlyLayout(Layout):
117
118 def __init__(self, texifier: Texifier) -> None:
119 self._texifier = texifier
120
121 def renderChunk(self, chunk: Chunk) -> bytes:
122 return self._texifier.texify(chunk.content) + b'\n'
123
124
125 class BelowIconLayout(Layout):
126
127 def __init__(self, texifier: Texifier, image_size: float) -> None:
128 self._texifier = texifier
129 self._image_size = image_size
130
131 def renderChunk(self, chunk: Chunk) -> bytes:
132 icon = renderIcon(chunk.icon, self._image_size)
133 meta = [icon] if icon else []
134 meta += [self._texifier.texify(x)
135 for x in [chunk.character, chunk.screen_name, chunk.author]
136 if x is not None]
137
138 return b'''\\wrapstuffclear
139 \\begin{wrapstuff}[l]
140 \\fbox{
141 \\begin{varwidth}{0.5\\textwidth}
142 \\smash{\\parbox[t][0pt]{0pt}{
143 \\setlength{\\fboxrule}{0.2pt}
144 \\setlength{\\fboxsep}{0pt}
145 \\vspace{-3.4pt}
146 \\fbox{\\hspace{107mm}}
147 }\\\\*}
148 \\vspace{-1em}
149 \\begin{center}
150 %s
151 \\end{center}
152 \\end{varwidth}
153 }
154 \\end{wrapstuff}
155
156 \\strut
157
158 \\noindent %s
159 ''' % (
160 b'\\\\*'.join(meta),
161 self._texifier.texify(chunk.content))
162
163
164 class BesideIconLayout(Layout):
165
166 def __init__(self, texifier: Texifier, image_size: float) -> None:
167 self._texifier = texifier
168 self._image_size = image_size
169
170 def renderChunk(self, chunk: Chunk) -> bytes:
171 icon = renderIcon(chunk.icon, self._image_size)
172 meta = [
173 chunk.character,
174 chunk.screen_name,
175 chunk.author,
176 ]
177
178 # Why is \textwidth not the width of the text?
179 # Why is the width of the text .765\textwidth?
180 return b'''\\noindent\\fbox{
181 %s
182 \\parbox[b]{.765\\textwidth}{
183 \\begin{center}
184 %s
185 \\end{center}
186 }
187 }\\\\*
188 \\vspace{-0.75em}\\\\*
189 \\noindent %s
190
191 \\strut
192
193 ''' % (
194 icon if icon else b'',
195 b'\\\\*'.join(self._texifier.texify(x) for x in meta if x is not None),
196 self._texifier.texify(chunk.content))