git.scottworley.com Git - paperdoorknob/blame_incremental

... / ...

Commit	Line	Data
	1	# paperdoorknob: Print glowfic
	2	#
	3	# This program is free software: you can redistribute it and/or modify it
	4	# under the terms of the GNU General Public License as published by the
	5	# Free Software Foundation, version 3.
	6
	7
	8	from dataclasses import dataclass
	9	import itertools
	10	from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
	11
	12	from typing import Iterable
	13
	14	from bs4 import BeautifulSoup
	15	from bs4.element import Tag
	16
	17	from images import ImageStore
	18	from spec import Spec
	19	from texify import Texifier
	20
	21
	22	def _removeViewFromURL(url: str) -> str:
	23	u = urlparse(url)
	24	old_qs = parse_qsl(u.query)
	25	new_qs = [(k, v) for k, v in old_qs if k != 'view']
	26	return urlunparse(u._replace(query=urlencode(new_qs)))
	27
	28
	29	def nonFlatURL(url: str) -> str:
	30	return _removeViewFromURL(url)
	31
	32
	33	def flatURL(url: str) -> str:
	34	u = urlparse(_removeViewFromURL(url))
	35	qs = parse_qsl(u.query) + [('view', 'flat')]
	36	return urlunparse(u._replace(query=urlencode(qs)))
	37
	38
	39	@dataclass(frozen=True)
	40	class Chunk:
	41	icon: str \| None
	42	character: Tag \| None
	43	screen_name: Tag \| None
	44	author: Tag \| None
	45	content: Tag
	46
	47	# We avoid the name "post" because the Glowfic community uses the term
	48	# inconsistently:
	49	# * The Glowfic software sometimes uses "post" to refer to a whole thread
	50	# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
	51	# but mostly uses "post" to refer to just the first chunk in a thread
	52	# (in the HTML and UI). The non-first chunks are "replies".
	53	# * Readers and this software don't need to distinguish first-chunks and
	54	# non-first-chunks.
	55	# * Humans in the community tend to use "posts" to mean chunks.
	56
	57
	58	class Thread:
	59
	60	def __init__(self, spec: Spec) -> None:
	61	spec.log('Fetching HTML...\r')
	62	html = spec.fetcher.fetch(flatURL(spec.url))
	63	spec.log('Parsing HTML...\r')
	64	self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
	65
	66	def title(self) -> str \| None:
	67	span = self._dom.findChild("span", id="post-title")
	68	if not isinstance(span, Tag):
	69	return None
	70	return span.text.strip()
	71
	72	def chunkDOMs(self) -> Iterable[Tag]:
	73	def text() -> Tag:
	74	body = self._dom.body
	75	assert body
	76	text = body.find_next("div", class_="post-post")
	77	assert isinstance(text, Tag)
	78	return text
	79
	80	def the_replies() -> Iterable[Tag]:
	81	rs = self._dom.find_all("div", class_="post-reply")
	82	assert all(isinstance(r, Tag) for r in rs)
	83	return rs
	84
	85	return itertools.chain([text()], the_replies())
	86
	87
	88	def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
	89
	90	def getIcon() -> str \| None:
	91	icon_div = chunk_dom.findChild('div', class_='post-icon')
	92	if icon_div is None:
	93	return None
	94	assert isinstance(icon_div, Tag)
	95	icon_img = icon_div.findChild('img')
	96	if icon_img is None:
	97	return None
	98	assert isinstance(icon_img, Tag)
	99	return image_store.get_image(icon_img.attrs['src'])
	100
	101	def getByClass(css_class: str) -> Tag \| None:
	102	tag = chunk_dom.findChild('div', class_=css_class)
	103	assert tag is None or isinstance(tag, Tag)
	104	return tag
	105
	106	def stripHREF(tag: Tag) -> None:
	107	for c in tag.findChildren("a"):
	108	if "href" in c.attrs:
	109	del c.attrs["href"]
	110
	111	def getMeta(css_class: str) -> Tag \| None:
	112	tag = getByClass(css_class)
	113	if tag is None:
	114	return None
	115	stripHREF(tag)
	116	return tag
	117
	118	content = chunk_dom.findChild('div', class_='post-content')
	119	assert isinstance(content, Tag)
	120
	121	return Chunk(getIcon(),
	122	getMeta('post-character'),
	123	getMeta('post-screenname'),
	124	getMeta('post-author'),
	125	content)
	126
	127
	128	def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
	129	return b''.join([
	130	br'\glowhead{',
	131	br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
	132	b'}{',
	133	texifier.texify(chunk.character) if chunk.character else b'',
	134	b'}{',
	135	texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
	136	b'}{',
	137	texifier.texify(chunk.author) if chunk.author else b'',
	138	b'}',
	139	texifier.texify(chunk.content)])
	140
	141
	142	ContentOnlyLayout = br'''
	143	\newcommand{\glowhead}[4]{}
	144	'''
	145
	146
	147	BelowIconLayout = br'''
	148	\newcommand{\glowhead}[4]{\wrapstuffclear
	149	\begin{wrapstuff}[l]
	150	\fbox{
	151	\begin{varwidth}{0.5\textwidth}
	152	\smash{\parbox[t][0pt]{0pt}{
	153	\setlength{\fboxrule}{0.2pt}
	154	\setlength{\fboxsep}{0pt}
	155	\vspace{-3.4pt}
	156	\fbox{\hspace{107mm}}
	157	}\\*}
	158	\vspace{-1em}
	159	\begin{center}
	160	#1\ifnotempty
	161	{#1}{\\*}#2\ifnotempty
	162	{#2}{\\*}#3\ifnotempty
	163	{#3}{\\*}#4
	164	\end{center}
	165	\end{varwidth}
	166	}
	167	\end{wrapstuff}
	168
	169	\strut
	170
	171	\noindent}'''
	172
	173
	174	# Why is \textwidth not the width of the text?
	175	# Why is the width of the text .765\textwidth?
	176	BesideIconLayout = br'''
	177	\newcommand{\glowhead}[4]{
	178
	179	\strut
	180
	181	\noindent\fbox{
	182	#1
	183	\parbox[b]{.765\textwidth}{
	184	\begin{center}
	185	#2\ifnotempty
	186	{#2}{\\*}#3\ifnotempty
	187	{#3}{\\*}#4
	188	\end{center}
	189	}
	190	}\\*
	191	\vspace{-0.75em}\\*
	192	\noindent}'''