git.scottworley.com Git - paperdoorknob/blame_incremental

... / ...

Commit	Line	Data
	1	# paperdoorknob: Print glowfic
	2	#
	3	# This program is free software: you can redistribute it and/or modify it
	4	# under the terms of the GNU General Public License as published by the
	5	# Free Software Foundation, version 3.
	6
	7
	8	from dataclasses import dataclass
	9	import itertools
	10	from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
	11
	12	from typing import Iterable
	13
	14	from bs4 import BeautifulSoup
	15	from bs4.element import Tag
	16
	17	from images import ImageStore
	18	from spec import Spec
	19	from texify import Texifier
	20
	21
	22	def _removeViewFromURL(url: str) -> str:
	23	u = urlparse(url)
	24	old_qs = parse_qsl(u.query)
	25	new_qs = [(k, v) for k, v in old_qs if k != 'view']
	26	return urlunparse(u._replace(query=urlencode(new_qs)))
	27
	28
	29	def nonFlatURL(url: str) -> str:
	30	return _removeViewFromURL(url)
	31
	32
	33	def flatURL(url: str) -> str:
	34	u = urlparse(_removeViewFromURL(url))
	35	qs = parse_qsl(u.query) + [('view', 'flat')]
	36	return urlunparse(u._replace(query=urlencode(qs)))
	37
	38
	39	@dataclass(frozen=True)
	40	class Chunk:
	41	icon: str \| None
	42	character: Tag \| None
	43	screen_name: Tag \| None
	44	author: Tag \| None
	45	content: Tag
	46
	47	# We avoid the name "post" because the Glowfic community uses the term
	48	# inconsistently:
	49	# * The Glowfic software sometimes uses "post" to refer to a whole thread
	50	# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
	51	# but mostly uses "post" to refer to just the first chunk in a thread
	52	# (in the HTML and UI). The non-first chunks are "replies".
	53	# * Readers and this software don't need to distinguish first-chunks and
	54	# non-first-chunks.
	55	# * Humans in the community tend to use "posts" to mean chunks.
	56
	57
	58	class Thread:
	59
	60	def __init__(self, thing: BeautifulSoup \| Spec) -> None:
	61	if isinstance(thing, Spec):
	62	spec = thing
	63	spec.log('Fetching HTML...\r')
	64	html = spec.fetcher.fetch(flatURL(spec.url))
	65	spec.log('Parsing HTML...\r')
	66	self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
	67	else:
	68	self._dom = thing
	69
	70	def title(self) -> str \| None:
	71	span = self._dom.findChild("span", id="post-title")
	72	if not isinstance(span, Tag):
	73	return None
	74	return span.text.strip()
	75
	76	def chunkDOMs(self) -> Iterable[Tag]:
	77	def text() -> Tag:
	78	body = self._dom.body
	79	assert body
	80	text = body.find_next("div", class_="post-post")
	81	assert isinstance(text, Tag)
	82	return text
	83
	84	def the_replies() -> Iterable[Tag]:
	85	rs = self._dom.find_all("div", class_="post-reply")
	86	assert all(isinstance(r, Tag) for r in rs)
	87	return rs
	88
	89	return itertools.chain([text()], the_replies())
	90
	91
	92	def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
	93
	94	def getIcon() -> str \| None:
	95	icon_div = chunk_dom.findChild('div', class_='post-icon')
	96	if icon_div is None:
	97	return None
	98	assert isinstance(icon_div, Tag)
	99	icon_img = icon_div.findChild('img')
	100	if icon_img is None:
	101	return None
	102	assert isinstance(icon_img, Tag)
	103	return image_store.get_image(icon_img.attrs['src'])
	104
	105	def getByClass(css_class: str) -> Tag \| None:
	106	tag = chunk_dom.findChild('div', class_=css_class)
	107	assert tag is None or isinstance(tag, Tag)
	108	return tag
	109
	110	def stripHREF(tag: Tag) -> None:
	111	for c in tag.findChildren("a"):
	112	if "href" in c.attrs:
	113	del c.attrs["href"]
	114
	115	def getMeta(css_class: str) -> Tag \| None:
	116	tag = getByClass(css_class)
	117	if tag is None:
	118	return None
	119	stripHREF(tag)
	120	return tag
	121
	122	content = chunk_dom.findChild('div', class_='post-content')
	123	assert isinstance(content, Tag)
	124
	125	return Chunk(getIcon(),
	126	getMeta('post-character'),
	127	getMeta('post-screenname'),
	128	getMeta('post-author'),
	129	content)
	130
	131
	132	def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
	133	return b''.join([
	134	br'\glowhead{',
	135	br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
	136	b'}{',
	137	texifier.texify(chunk.character) if chunk.character else b'',
	138	b'}{',
	139	texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
	140	b'}{',
	141	texifier.texify(chunk.author) if chunk.author else b'',
	142	b'}',
	143	texifier.texify(chunk.content)])
	144
	145
	146	ContentOnlyLayout = br'''
	147	\newcommand{\glowhead}[4]{}
	148	'''
	149
	150
	151	BelowIconLayout = br'''
	152	\newcommand{\glowhead}[4]{\wrapstuffclear
	153	\begin{wrapstuff}[l]
	154	\fbox{
	155	\begin{varwidth}{0.5\textwidth}
	156	\smash{\parbox[t][0pt]{0pt}{
	157	\setlength{\fboxrule}{0.2pt}
	158	\setlength{\fboxsep}{0pt}
	159	\vspace{-3.4pt}
	160	\fbox{\hspace{107mm}}
	161	}\\*}
	162	\vspace{-1em}
	163	\begin{center}
	164	#1\ifnotempty
	165	{#1}{\\*}#2\ifnotempty
	166	{#2}{\\*}#3\ifnotempty
	167	{#3}{\\*}#4
	168	\end{center}
	169	\end{varwidth}
	170	}
	171	\end{wrapstuff}
	172
	173	\strut
	174
	175	\noindent}'''
	176
	177
	178	# Why is \textwidth not the width of the text?
	179	# Why is the width of the text .765\textwidth?
	180	BesideIconLayout = br'''
	181	\newcommand{\glowhead}[4]{
	182
	183	\strut
	184
	185	\noindent\fbox{
	186	#1
	187	\parbox[b]{.765\textwidth}{
	188	\begin{center}
	189	#2\ifnotempty
	190	{#2}{\\*}#3\ifnotempty
	191	{#3}{\\*}#4
	192	\end{center}
	193	}
	194	}\\*
	195	\vspace{-0.75em}\\*
	196	\noindent}'''