git.scottworley.com Git - paperdoorknob/blob - glowfic.py

   1 # paperdoorknob: Print glowfic
   2 #
   3 # This program is free software: you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License as published by the
   5 # Free Software Foundation, version 3.
   6
   7
   8 from dataclasses import dataclass
   9 import itertools
  10 from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
  11
  12 from typing import Iterable
  13
  14 from bs4 import BeautifulSoup
  15 from bs4.element import Tag
  16
  17 from images import ImageStore
  18 from spec import Spec
  19 from texify import Texifier
  20
  21
  22 def _removeViewFromURL(url: str) -> str:
  23     u = urlparse(url)
  24     old_qs = parse_qsl(u.query)
  25     new_qs = [(k, v) for k, v in old_qs if k != 'view']
  26     return urlunparse(u._replace(query=urlencode(new_qs)))
  27
  28
  29 def nonFlatURL(url: str) -> str:
  30     return _removeViewFromURL(url)
  31
  32
  33 def flatURL(url: str) -> str:
  34     u = urlparse(_removeViewFromURL(url))
  35     qs = parse_qsl(u.query) + [('view', 'flat')]
  36     return urlunparse(u._replace(query=urlencode(qs)))
  37
  38
  39 @dataclass(frozen=True)
  40 class Chunk:
  41     icon: str | None
  42     character: Tag | None
  43     screen_name: Tag | None
  44     author: Tag | None
  45     content: Tag
  46
  47 # We avoid the name "post" because the Glowfic community uses the term
  48 # inconsistently:
  49 #  * The Glowfic software sometimes uses "post" to refer to a whole thread
  50 #    (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
  51 #    but mostly uses "post" to refer to just the first chunk in a thread
  52 #    (in the HTML and UI).  The non-first chunks are "replies".
  53 #  * Readers and this software don't need to distinguish first-chunks and
  54 #    non-first-chunks.
  55 #  * Humans in the community tend to use "posts" to mean chunks.
  56
  57
  58 class Thread:
  59
  60     def __init__(self, spec: Spec) -> None:
  61         spec.log('Fetching HTML...\r')
  62         html = spec.fetcher.fetch(flatURL(spec.url))
  63         spec.log('Parsing HTML...\r')
  64         self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
  65
  66     def title(self) -> str | None:
  67         span = self._dom.findChild("span", id="post-title")
  68         if not isinstance(span, Tag):
  69             return None
  70         return span.text.strip()
  71
  72     def chunkDOMs(self) -> Iterable[Tag]:
  73         def text() -> Tag:
  74             body = self._dom.body
  75             assert body
  76             text = body.find_next("div", class_="post-post")
  77             assert isinstance(text, Tag)
  78             return text
  79
  80         def the_replies() -> Iterable[Tag]:
  81             rs = self._dom.find_all("div", class_="post-reply")
  82             assert all(isinstance(r, Tag) for r in rs)
  83             return rs
  84
  85         return itertools.chain([text()], the_replies())
  86
  87
  88 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
  89
  90     def getIcon() -> str | None:
  91         icon_div = chunk_dom.findChild('div', class_='post-icon')
  92         if icon_div is None:
  93             return None
  94         assert isinstance(icon_div, Tag)
  95         icon_img = icon_div.findChild('img')
  96         if icon_img is None:
  97             return None
  98         assert isinstance(icon_img, Tag)
  99         return image_store.get_image(icon_img.attrs['src'])
 100
 101     def getByClass(css_class: str) -> Tag | None:
 102         tag = chunk_dom.findChild('div', class_=css_class)
 103         assert tag is None or isinstance(tag, Tag)
 104         return tag
 105
 106     def stripHREF(tag: Tag) -> None:
 107         for c in tag.findChildren("a"):
 108             if "href" in c.attrs:
 109                 del c.attrs["href"]
 110
 111     def getMeta(css_class: str) -> Tag | None:
 112         tag = getByClass(css_class)
 113         if tag is None:
 114             return None
 115         stripHREF(tag)
 116         return tag
 117
 118     content = chunk_dom.findChild('div', class_='post-content')
 119     assert isinstance(content, Tag)
 120
 121     return Chunk(getIcon(),
 122                  getMeta('post-character'),
 123                  getMeta('post-screenname'),
 124                  getMeta('post-author'),
 125                  content)
 126
 127
 128 def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
 129     return b''.join([
 130         br'\glowhead{',
 131         br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
 132         b'}{',
 133         texifier.texify(chunk.character) if chunk.character else b'',
 134         b'}{',
 135         texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
 136         b'}{',
 137         texifier.texify(chunk.author) if chunk.author else b'',
 138         b'}',
 139         texifier.texify(chunk.content)])
 140
 141
 142 ContentOnlyLayout = br'''
 143 \newcommand{\glowhead}[4]{}
 144 '''
 145
 146
 147 BelowIconLayout = br'''
 148 \newcommand{\glowhead}[4]{\wrapstuffclear
 149 \begin{wrapstuff}[l]
 150 \fbox{
 151 \begin{varwidth}{0.5\textwidth}
 152   \smash{\parbox[t][0pt]{0pt}{
 153     \setlength{\fboxrule}{0.2pt}
 154     \setlength{\fboxsep}{0pt}
 155     \vspace{-3.4pt}
 156     \fbox{\hspace{107mm}}
 157   }\\*}
 158   \vspace{-1em}
 159 \begin{center}
 160 #1\ifnotempty
 161 {#1}{\\*}#2\ifnotempty
 162 {#2}{\\*}#3\ifnotempty
 163 {#3}{\\*}#4
 164 \end{center}
 165 \end{varwidth}
 166 }
 167 \end{wrapstuff}
 168
 169 \strut
 170
 171 \noindent}'''
 172
 173
 174 # Why is \textwidth not the width of the text?
 175 # Why is the width of the text .765\textwidth?
 176 BesideIconLayout = br'''
 177 \newcommand{\glowhead}[4]{
 178
 179 \strut
 180
 181 \noindent\fbox{
 182 #1
 183 \parbox[b]{.765\textwidth}{
 184 \begin{center}
 185 #2\ifnotempty
 186 {#2}{\\*}#3\ifnotempty
 187 {#3}{\\*}#4
 188 \end{center}
 189 }
 190 }\\*
 191 \vspace{-0.75em}\\*
 192 \noindent}'''