git.scottworley.com Git - paperdoorknob/blob - glowfic.py

   1 # paperdoorknob: Print glowfic
   2 #
   3 # This program is free software: you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License as published by the
   5 # Free Software Foundation, version 3.
   6
   7
   8 from dataclasses import dataclass
   9 import itertools
  10 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
  11
  12 from typing import Iterable
  13
  14 from bs4 import BeautifulSoup
  15 from bs4.element import Tag
  16
  17 from images import ImageStore
  18 from spec import Spec
  19 from texify import Texifier
  20
  21
  22 def _removeViewFromURL(url: str) -> str:
  23     u = urlparse(url)
  24     old_qs = parse_qsl(u.query)
  25     new_qs = [(k, v) for k, v in old_qs if k != 'view']
  26     return urlunparse(u._replace(query=urlencode(new_qs)))
  27
  28
  29 def nonFlatURL(url: str) -> str:
  30     return _removeViewFromURL(url)
  31
  32
  33 def flatURL(url: str) -> str:
  34     u = urlparse(_removeViewFromURL(url))
  35     qs = parse_qsl(u.query) + [('view', 'flat')]
  36     return urlunparse(u._replace(query=urlencode(qs)))
  37
  38
  39 @dataclass(frozen=True)
  40 class Chunk:
  41     icon: str | None
  42     character: Tag | None
  43     screen_name: Tag | None
  44     author: Tag | None
  45     content: Tag
  46
  47 # We avoid the name "post" because the Glowfic community uses the term
  48 # inconsistently:
  49 #  * The Glowfic software sometimes uses "post" to refer to a whole thread
  50 #    (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
  51 #    but mostly uses "post" to refer to just the first chunk in a thread
  52 #    (in the HTML and UI).  The non-first chunks are "replies".
  53 #  * Readers and this software don't need to distinguish first-chunks and
  54 #    non-first-chunks.
  55 #  * Humans in the community tend to use "posts" to mean chunks.
  56
  57
  58 class Thread:
  59
  60     def __init__(self, thing: BeautifulSoup | Spec) -> None:
  61         if isinstance(thing, Spec):
  62             spec = thing
  63             spec.log('Fetching HTML...\r')
  64             html = spec.fetcher.fetch(flatURL(spec.url))
  65             spec.log('Parsing HTML...\r')
  66             self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
  67         else:
  68             self._dom = thing
  69
  70     def title(self) -> str | None:
  71         span = self._dom.findChild("span", id="post-title")
  72         if not isinstance(span, Tag):
  73             return None
  74         return span.text.strip()
  75
  76     def chunkDOMs(self) -> Iterable[Tag]:
  77         def text() -> Tag:
  78             body = self._dom.body
  79             assert body
  80             text = body.find_next("div", class_="post-post")
  81             assert isinstance(text, Tag)
  82             return text
  83
  84         def the_replies() -> Iterable[Tag]:
  85             rs = self._dom.find_all("div", class_="post-reply")
  86             assert all(isinstance(r, Tag) for r in rs)
  87             return rs
  88
  89         return itertools.chain([text()], the_replies())
  90
  91
  92 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
  93
  94     def getIcon() -> str | None:
  95         icon_div = chunk_dom.findChild('div', class_='post-icon')
  96         if icon_div is None:
  97             return None
  98         assert isinstance(icon_div, Tag)
  99         icon_img = icon_div.findChild('img')
 100         if icon_img is None:
 101             return None
 102         assert isinstance(icon_img, Tag)
 103         return image_store.get_image(icon_img.attrs['src'])
 104
 105     def getByClass(css_class: str) -> Tag | None:
 106         tag = chunk_dom.findChild('div', class_=css_class)
 107         assert tag is None or isinstance(tag, Tag)
 108         return tag
 109
 110     def stripHREF(tag: Tag) -> None:
 111         for c in tag.findChildren("a"):
 112             if "href" in c.attrs:
 113                 del c.attrs["href"]
 114
 115     def getMeta(css_class: str) -> Tag | None:
 116         tag = getByClass(css_class)
 117         if tag is None:
 118             return None
 119         stripHREF(tag)
 120         return tag
 121
 122     content = chunk_dom.findChild('div', class_='post-content')
 123     assert isinstance(content, Tag)
 124
 125     return Chunk(getIcon(),
 126                  getMeta('post-character'),
 127                  getMeta('post-screenname'),
 128                  getMeta('post-author'),
 129                  content)
 130
 131
 132 def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
 133     return b''.join([
 134         br'\glowhead{',
 135         br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
 136         b'}{',
 137         texifier.texify(chunk.character) if chunk.character else b'',
 138         b'}{',
 139         texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
 140         b'}{',
 141         texifier.texify(chunk.author) if chunk.author else b'',
 142         b'}',
 143         texifier.texify(chunk.content)])
 144
 145
 146 ContentOnlyLayout = br'''
 147 \newcommand{\glowhead}[4]{}
 148 '''
 149
 150
 151 BelowIconLayout = br'''
 152 \newcommand{\glowhead}[4]{\wrapstuffclear
 153 \begin{wrapstuff}[l]
 154 \fbox{
 155 \begin{varwidth}{0.5\textwidth}
 156   \smash{\parbox[t][0pt]{0pt}{
 157     \setlength{\fboxrule}{0.2pt}
 158     \setlength{\fboxsep}{0pt}
 159     \vspace{-3.4pt}
 160     \fbox{\hspace{107mm}}
 161   }\\*}
 162   \vspace{-1em}
 163 \begin{center}
 164 #1\ifnotempty
 165 {#1}{\\*}#2\ifnotempty
 166 {#2}{\\*}#3\ifnotempty
 167 {#3}{\\*}#4
 168 \end{center}
 169 \end{varwidth}
 170 }
 171 \end{wrapstuff}
 172
 173 \strut
 174
 175 \noindent}'''
 176
 177
 178 # Why is \textwidth not the width of the text?
 179 # Why is the width of the text .765\textwidth?
 180 BesideIconLayout = br'''
 181 \newcommand{\glowhead}[4]{
 182
 183 \strut
 184
 185 \noindent\fbox{
 186 #1
 187 \parbox[b]{.765\textwidth}{
 188 \begin{center}
 189 #2\ifnotempty
 190 {#2}{\\*}#3\ifnotempty
 191 {#3}{\\*}#4
 192 \end{center}
 193 }
 194 }\\*
 195 \vspace{-0.75em}\\*
 196 \noindent}'''