]> git.scottworley.com Git - paperdoorknob/blame_incremental - glowfic.py
Move get_title() to Thread
[paperdoorknob] / glowfic.py
... / ...
CommitLineData
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
8from dataclasses import dataclass
9import itertools
10from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
11
12from typing import Iterable
13
14from bs4 import BeautifulSoup
15from bs4.element import Tag
16
17from images import ImageStore
18from texify import Texifier
19
20
21def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
38@dataclass(frozen=True)
39class Chunk:
40 icon: str | None
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
44 content: Tag
45
46# We avoid the name "post" because the Glowfic community uses the term
47# inconsistently:
48# * The Glowfic software sometimes uses "post" to refer to a whole thread
49# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50# but mostly uses "post" to refer to just the first chunk in a thread
51# (in the HTML and UI). The non-first chunks are "replies".
52# * Readers and this software don't need to distinguish first-chunks and
53# non-first-chunks.
54# * Humans in the community tend to use "posts" to mean chunks.
55
56
57class Thread:
58
59 def __init__(self, dom: BeautifulSoup) -> None:
60 self._dom = dom
61
62 def title(self) -> str | None:
63 span = self._dom.findChild("span", id="post-title")
64 if not isinstance(span, Tag):
65 return None
66 return span.text.strip()
67
68 def chunkDOMs(self) -> Iterable[Tag]:
69 def text() -> Tag:
70 body = self._dom.body
71 assert body
72 text = body.find_next("div", class_="post-post")
73 assert isinstance(text, Tag)
74 return text
75
76 def the_replies() -> Iterable[Tag]:
77 rs = self._dom.find_all("div", class_="post-reply")
78 assert all(isinstance(r, Tag) for r in rs)
79 return rs
80
81 return itertools.chain([text()], the_replies())
82
83
84def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
85
86 def getIcon() -> str | None:
87 icon_div = chunk_dom.findChild('div', class_='post-icon')
88 if icon_div is None:
89 return None
90 assert isinstance(icon_div, Tag)
91 icon_img = icon_div.findChild('img')
92 if icon_img is None:
93 return None
94 assert isinstance(icon_img, Tag)
95 return image_store.get_image(icon_img.attrs['src'])
96
97 def getByClass(css_class: str) -> Tag | None:
98 tag = chunk_dom.findChild('div', class_=css_class)
99 assert tag is None or isinstance(tag, Tag)
100 return tag
101
102 def stripHREF(tag: Tag) -> None:
103 for c in tag.findChildren("a"):
104 if "href" in c.attrs:
105 del c.attrs["href"]
106
107 def getMeta(css_class: str) -> Tag | None:
108 tag = getByClass(css_class)
109 if tag is None:
110 return None
111 stripHREF(tag)
112 return tag
113
114 content = chunk_dom.findChild('div', class_='post-content')
115 assert isinstance(content, Tag)
116
117 return Chunk(getIcon(),
118 getMeta('post-character'),
119 getMeta('post-screenname'),
120 getMeta('post-author'),
121 content)
122
123
124def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
125 return b''.join([
126 br'\glowhead{',
127 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
128 b'}{',
129 texifier.texify(chunk.character) if chunk.character else b'',
130 b'}{',
131 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
132 b'}{',
133 texifier.texify(chunk.author) if chunk.author else b'',
134 b'}',
135 texifier.texify(chunk.content)])
136
137
138ContentOnlyLayout = br'''
139\newcommand{\glowhead}[4]{}
140'''
141
142
143BelowIconLayout = br'''
144\newcommand{\glowhead}[4]{\wrapstuffclear
145\begin{wrapstuff}[l]
146\fbox{
147\begin{varwidth}{0.5\textwidth}
148 \smash{\parbox[t][0pt]{0pt}{
149 \setlength{\fboxrule}{0.2pt}
150 \setlength{\fboxsep}{0pt}
151 \vspace{-3.4pt}
152 \fbox{\hspace{107mm}}
153 }\\*}
154 \vspace{-1em}
155\begin{center}
156#1\ifnotempty
157{#1}{\\*}#2\ifnotempty
158{#2}{\\*}#3\ifnotempty
159{#3}{\\*}#4
160\end{center}
161\end{varwidth}
162}
163\end{wrapstuff}
164
165\strut
166
167\noindent}'''
168
169
170# Why is \textwidth not the width of the text?
171# Why is the width of the text .765\textwidth?
172BesideIconLayout = br'''
173\newcommand{\glowhead}[4]{
174
175\strut
176
177\noindent\fbox{
178#1
179\parbox[b]{.765\textwidth}{
180\begin{center}
181#2\ifnotempty
182{#2}{\\*}#3\ifnotempty
183{#3}{\\*}#4
184\end{center}
185}
186}\\*
187\vspace{-0.75em}\\*
188\noindent}'''