]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
Learning TeX: Do Layouts with TeX macros
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from dataclasses import dataclass
9 import itertools
10 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
11
12 from typing import Iterable
13
14 from bs4 import BeautifulSoup
15 from bs4.element import Tag
16
17 from images import ImageStore
18 from texify import Texifier
19
20
21 def _removeViewFromURL(url: str) -> str:
22 u = urlparse(url)
23 old_qs = parse_qsl(u.query)
24 new_qs = [(k, v) for k, v in old_qs if k != 'view']
25 return urlunparse(u._replace(query=urlencode(new_qs)))
26
27
28 def nonFlatURL(url: str) -> str:
29 return _removeViewFromURL(url)
30
31
32 def flatURL(url: str) -> str:
33 u = urlparse(_removeViewFromURL(url))
34 qs = parse_qsl(u.query) + [('view', 'flat')]
35 return urlunparse(u._replace(query=urlencode(qs)))
36
37
38 @dataclass(frozen=True)
39 class Chunk:
40 icon: str | None
41 character: Tag | None
42 screen_name: Tag | None
43 author: Tag | None
44 content: Tag
45
46 # We avoid the name "post" because the Glowfic community uses the term
47 # inconsistently:
48 # * The Glowfic software sometimes uses "post" to refer to a whole thread
49 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
50 # but mostly uses "post" to refer to just the first chunk in a thread
51 # (in the HTML and UI). The non-first chunks are "replies".
52 # * Readers and this software don't need to distinguish first-chunks and
53 # non-first-chunks.
54 # * Humans in the community tend to use "posts" to mean chunks.
55
56
57 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
58 def text() -> Tag:
59 body = html.body
60 assert body
61 text = body.find_next("div", class_="post-post")
62 assert isinstance(text, Tag)
63 return text
64
65 def the_replies() -> Iterable[Tag]:
66 rs = html.find_all("div", class_="post-reply")
67 assert all(isinstance(r, Tag) for r in rs)
68 return rs
69
70 return itertools.chain([text()], the_replies())
71
72
73 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
74
75 def getIcon() -> str | None:
76 icon_div = chunk_dom.findChild('div', class_='post-icon')
77 if icon_div is None:
78 return None
79 assert isinstance(icon_div, Tag)
80 icon_img = icon_div.findChild('img')
81 if icon_img is None:
82 return None
83 assert isinstance(icon_img, Tag)
84 return image_store.get_image(icon_img.attrs['src'])
85
86 def getByClass(css_class: str) -> Tag | None:
87 tag = chunk_dom.findChild('div', class_=css_class)
88 assert tag is None or isinstance(tag, Tag)
89 return tag
90
91 def stripHREF(tag: Tag) -> None:
92 for c in tag.findChildren("a"):
93 if "href" in c.attrs:
94 del c.attrs["href"]
95
96 def getMeta(css_class: str) -> Tag | None:
97 tag = getByClass(css_class)
98 if tag is None:
99 return None
100 stripHREF(tag)
101 return tag
102
103 content = chunk_dom.findChild('div', class_='post-content')
104 assert isinstance(content, Tag)
105
106 return Chunk(getIcon(),
107 getMeta('post-character'),
108 getMeta('post-screenname'),
109 getMeta('post-author'),
110 content)
111
112
113 def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
114 return b''.join([
115 br'\glowhead{',
116 br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
117 b'}{',
118 texifier.texify(chunk.character) if chunk.character else b'',
119 b'}{',
120 texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
121 b'}{',
122 texifier.texify(chunk.author) if chunk.author else b'',
123 b'}',
124 texifier.texify(chunk.content)])
125
126
127 ContentOnlyLayout = br'''
128 \newcommand{\glowhead}[4]{}
129 '''
130
131
132 BelowIconLayout = br'''
133 \newcommand{\glowhead}[4]{\wrapstuffclear
134 \begin{wrapstuff}[l]
135 \fbox{
136 \begin{varwidth}{0.5\textwidth}
137 \smash{\parbox[t][0pt]{0pt}{
138 \setlength{\fboxrule}{0.2pt}
139 \setlength{\fboxsep}{0pt}
140 \vspace{-3.4pt}
141 \fbox{\hspace{107mm}}
142 }\\*}
143 \vspace{-1em}
144 \begin{center}
145 #1\ifnotempty
146 {#1}{\\*}#2\ifnotempty
147 {#2}{\\*}#3\ifnotempty
148 {#3}{\\*}#4
149 \end{center}
150 \end{varwidth}
151 }
152 \end{wrapstuff}
153
154 \strut
155
156 \noindent}'''
157
158
159 # Why is \textwidth not the width of the text?
160 # Why is the width of the text .765\textwidth?
161 BesideIconLayout = br'''
162 \newcommand{\glowhead}[4]{
163
164 \strut
165
166 \noindent\fbox{
167 #1
168 \parbox[b]{.765\textwidth}{
169 \begin{center}
170 #2\ifnotempty
171 {#2}{\\*}#3\ifnotempty
172 {#3}{\\*}#4
173 \end{center}
174 }
175 }\\*
176 \vspace{-0.75em}\\*
177 \noindent}'''