git.scottworley.com Git - paperdoorknob/blob - glowfic.py

   1 # paperdoorknob: Print glowfic
   2 #
   3 # This program is free software: you can redistribute it and/or modify it
   4 # under the terms of the GNU General Public License as published by the
   5 # Free Software Foundation, version 3.
   6
   7
   8 from dataclasses import dataclass
   9 import itertools
  10
  11 from typing import Iterable
  12
  13 from bs4 import BeautifulSoup
  14 from bs4.element import Tag
  15
  16 from images import ImageStore
  17
  18
  19 @dataclass(frozen=True)
  20 class Chunk:
  21     icon: str | None
  22     character: str | None
  23     screen_name: str | None
  24     author: str | None
  25     content: Tag
  26
  27 # We avoid the name "post" because the Glowfic community uses the term
  28 # inconsistently:
  29 #  * The Glowfic software sometimes uses "post" to refer to a whole thread
  30 #    (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
  31 #    but mostly uses "post" to refer to just the first chunk in a thread
  32 #    (in the HTML and UI).  The non-first chunks are "replies".
  33 #  * Readers and this software don't need to distinguish first-chunks and
  34 #    non-first-chunks.
  35 #  * Humans in the community tend to use "posts" to mean chunks.
  36
  37
  38 def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
  39     def text() -> Tag:
  40         body = html.body
  41         assert body
  42         text = body.find_next("div", class_="post-post")
  43         assert isinstance(text, Tag)
  44         return text
  45
  46     def the_replies() -> Iterable[Tag]:
  47         rs = html.find_all("div", class_="post-reply")
  48         assert all(isinstance(r, Tag) for r in rs)
  49         return rs
  50
  51     return itertools.chain([text()], the_replies())
  52
  53
  54 def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
  55
  56     def getIcon() -> str | None:
  57         icon_div = chunk_dom.find_next('div', class_='post-icon')
  58         if icon_div is None:
  59             return None
  60         icon_img = icon_div.find_next('img')
  61         if icon_img is None:
  62             return None
  63         assert isinstance(icon_img, Tag)
  64         return image_store.get_image(icon_img.attrs['src'])
  65
  66     def getTextByClass(css_class: str) -> str | None:
  67         div = chunk_dom.find_next('div', class_=css_class)
  68         if div is None:
  69             return None
  70         return div.text.strip()
  71
  72     content = chunk_dom.find_next('div', class_='post-content')
  73     assert isinstance(content, Tag)
  74
  75     return Chunk(getIcon(),
  76                  getTextByClass('post-character'),
  77                  getTextByClass('post-screenname'),
  78                  getTextByClass('post-author'),
  79                  content)