]> git.scottworley.com Git - paperdoorknob/blob - glowfic.py
No indent on first paragraph in each chunk
[paperdoorknob] / glowfic.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from abc import ABC, abstractmethod
9 from dataclasses import dataclass
10 import itertools
11
12 from typing import Iterable
13
14 from bs4 import BeautifulSoup
15 from bs4.element import Tag
16
17 from images import ImageStore
18 from texify import Texifier
19
20
21 @dataclass(frozen=True)
22 class Chunk:
23 icon: str | None
24 character: str | None
25 screen_name: str | None
26 author: str | None
27 content: Tag
28
29 # We avoid the name "post" because the Glowfic community uses the term
30 # inconsistently:
31 # * The Glowfic software sometimes uses "post" to refer to a whole thread
32 # (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
33 # but mostly uses "post" to refer to just the first chunk in a thread
34 # (in the HTML and UI). The non-first chunks are "replies".
35 # * Readers and this software don't need to distinguish first-chunks and
36 # non-first-chunks.
37 # * Humans in the community tend to use "posts" to mean chunks.
38
39
40 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
41 def text() -> Tag:
42 body = html.body
43 assert body
44 text = body.find_next("div", class_="post-post")
45 assert isinstance(text, Tag)
46 return text
47
48 def the_replies() -> Iterable[Tag]:
49 rs = html.find_all("div", class_="post-reply")
50 assert all(isinstance(r, Tag) for r in rs)
51 return rs
52
53 return itertools.chain([text()], the_replies())
54
55
56 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
57
58 def getIcon() -> str | None:
59 icon_div = chunk_dom.find_next('div', class_='post-icon')
60 if icon_div is None:
61 return None
62 icon_img = icon_div.find_next('img')
63 if icon_img is None:
64 return None
65 assert isinstance(icon_img, Tag)
66 return image_store.get_image(icon_img.attrs['src'])
67
68 def getTextByClass(css_class: str) -> str | None:
69 div = chunk_dom.find_next('div', class_=css_class)
70 if div is None:
71 return None
72 return div.text.strip()
73
74 content = chunk_dom.find_next('div', class_='post-content')
75 assert isinstance(content, Tag)
76
77 return Chunk(getIcon(),
78 getTextByClass('post-character'),
79 getTextByClass('post-screenname'),
80 getTextByClass('post-author'),
81 content)
82
83
84 def renderIcon(icon_path: str | None, image_size: float) -> str | None:
85 params = f'width={image_size}mm,height={image_size}mm,keepaspectratio'
86 return f'\\includegraphics[{params}]{{{icon_path}}}' if icon_path else None
87
88
89 class Layout(ABC):
90
91 @abstractmethod
92 def renderChunk(self, chunk: Chunk) -> bytes:
93 raise NotImplementedError()
94
95
96 class ContentOnlyLayout(Layout):
97
98 def __init__(self, texifier: Texifier) -> None:
99 self._texifier = texifier
100
101 def renderChunk(self, chunk: Chunk) -> bytes:
102 return self._texifier.texify(chunk.content) + b'\n'
103
104
105 class BelowIconLayout(Layout):
106
107 def __init__(self, texifier: Texifier, image_size: float) -> None:
108 self._texifier = texifier
109 self._image_size = image_size
110
111 def renderChunk(self, chunk: Chunk) -> bytes:
112 meta = [
113 renderIcon(chunk.icon, self._image_size),
114 chunk.character,
115 chunk.screen_name,
116 chunk.author,
117 ]
118
119 return b'''\\wrapstuffclear
120 \\begin{wrapstuff}[l]
121 \\fbox{
122 \\begin{varwidth}{0.5\\textwidth}
123 \\smash{\\parbox[t][0pt]{0pt}{
124 \\setlength{\\fboxrule}{0.2pt}
125 \\setlength{\\fboxsep}{0pt}
126 \\vspace{-3.4pt}
127 \\fbox{\\hspace{107mm}}
128 }\\\\*}
129 \\vspace{-1em}
130 \\begin{center}
131 %s
132 \\end{center}
133 \\end{varwidth}
134 }
135 \\end{wrapstuff}
136
137 \\strut
138
139 \\noindent %s
140 ''' % (
141 b'\\\\*'.join(x.encode('UTF-8') for x in meta if x is not None),
142 self._texifier.texify(chunk.content))