]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Move get_title() to Thread
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from dataclasses import dataclass
9 import itertools
10 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
11
12 from typing import Iterable
13
14 from bs4 import BeautifulSoup
15 from bs4.element import Tag
16
17 from images import ImageStore
18 from texify import Texifier
19
20
21 def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28 def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32 def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
38 @dataclass(frozen=True)
39 class Chunk:
40 icon: str | None
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
44 content: Tag
45
46 # We avoid the name "post" because the Glowfic community uses the term
47 # inconsistently:
48 # * The Glowfic software sometimes uses "post" to refer to a whole thread
49 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50 # but mostly uses "post" to refer to just the first chunk in a thread
51 # (in the HTML and UI). The non-first chunks are "replies".
52 # * Readers and this software don't need to distinguish first-chunks and
53 # non-first-chunks.
54 # * Humans in the community tend to use "posts" to mean chunks.
55
56
57 class Thread:
58
59 def __init__(self, dom: BeautifulSoup) -> None:
60 self._dom = dom
61
62 def title(self) -> str | None:
63 span = self._dom.findChild("span", id="post-title")
64 if not isinstance(span, Tag):
65 return None
66 return span.text.strip()
67
68 def chunkDOMs(self) -> Iterable[Tag]:
69 def text() -> Tag:
70 body = self._dom.body
71 assert body
72 text = body.find_next("div", class_="post-post")
73 assert isinstance(text, Tag)
74 return text
75
76 def the_replies() -> Iterable[Tag]:
77 rs = self._dom.find_all("div", class_="post-reply")
78 assert all(isinstance(r, Tag) for r in rs)
79 return rs
80
81 return itertools.chain([text()], the_replies())
82
83
84 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
85
86 def getIcon() -> str | None:
87 icon_div = chunk_dom.findChild('div', class_='post-icon')
88 if icon_div is None:
89 return None
90 assert isinstance(icon_div, Tag)
91 icon_img = icon_div.findChild('img')
92 if icon_img is None:
93 return None
94 assert isinstance(icon_img, Tag)
95 return image_store.get_image(icon_img.attrs['src'])
96
97 def getByClass(css_class: str) -> Tag | None:
98 tag = chunk_dom.findChild('div', class_=css_class)
99 assert tag is None or isinstance(tag, Tag)
100 return tag
101
102 def stripHREF(tag: Tag) -> None:
103 for c in tag.findChildren("a"):
104 if "href" in c.attrs:
105 del c.attrs["href"]
106
107 def getMeta(css_class: str) -> Tag | None:
108 tag = getByClass(css_class)
109 if tag is None:
110 return None
111 stripHREF(tag)
112 return tag
113
114 content = chunk_dom.findChild('div', class_='post-content')
115 assert isinstance(content, Tag)
116
117 return Chunk(getIcon(),
118 getMeta('post-character'),
119 getMeta('post-screenname'),
120 getMeta('post-author'),
121 content)
122
123
124 def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
125 return b''.join([
126 br'\glowhead{',
127 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
128 b'}{',
129 texifier.texify(chunk.character) if chunk.character else b'',
130 b'}{',
131 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
132 b'}{',
133 texifier.texify(chunk.author) if chunk.author else b'',
134 b'}',
135 texifier.texify(chunk.content)])
136
137
138 ContentOnlyLayout = br'''
139 \newcommand{\glowhead}[4]{}
140 '''
141
142
143 BelowIconLayout = br'''
144 \newcommand{\glowhead}[4]{\wrapstuffclear
145 \begin{wrapstuff}[l]
146 \fbox{
147 \begin{varwidth}{0.5\textwidth}
148 \smash{\parbox[t][0pt]{0pt}{
149 \setlength{\fboxrule}{0.2pt}
150 \setlength{\fboxsep}{0pt}
151 \vspace{-3.4pt}
152 \fbox{\hspace{107mm}}
153 }\\*}
154 \vspace{-1em}
155 \begin{center}
156 #1\ifnotempty
157 {#1}{\\*}#2\ifnotempty
158 {#2}{\\*}#3\ifnotempty
159 {#3}{\\*}#4
160 \end{center}
161 \end{varwidth}
162 }
163 \end{wrapstuff}
164
165 \strut
166
167 \noindent}'''
168
169
170 # Why is \textwidth not the width of the text?
171 # Why is the width of the text .765\textwidth?
172 BesideIconLayout = br'''
173 \newcommand{\glowhead}[4]{
174
175 \strut
176
177 \noindent\fbox{
178 #1
179 \parbox[b]{.765\textwidth}{
180 \begin{center}
181 #2\ifnotempty
182 {#2}{\\*}#3\ifnotempty
183 {#3}{\\*}#4
184 \end{center}
185 }
186 }\\*
187 \vspace{-0.75em}\\*
188 \noindent}'''