git.scottworley.com Git - paperdoorknob/blame_incremental

... / ...

Commit	Line	Data
	1	# paperdoorknob: Print glowfic
	2	#
	3	# This program is free software: you can redistribute it and/or modify it
	4	# under the terms of the GNU General Public License as published by the
	5	# Free Software Foundation, version 3.
	6
	7
	8	from dataclasses import dataclass
	9	import itertools
	10	from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
	11
	12	from typing import Iterable
	13
	14	from bs4 import BeautifulSoup
	15	from bs4.element import Tag
	16
	17	from images import ImageStore
	18	from texify import Texifier
	19
	20
	21	def _removeViewFromURL(url: str) -> str:
	22	u = urlparse(url)
	23	old_qs = parse_qsl(u.query)
	24	new_qs = [(k, v) for k, v in old_qs if k != 'view']
	25	return urlunparse(u._replace(query=urlencode(new_qs)))
	26
	27
	28	def nonFlatURL(url: str) -> str:
	29	return _removeViewFromURL(url)
	30
	31
	32	def flatURL(url: str) -> str:
	33	u = urlparse(_removeViewFromURL(url))
	34	qs = parse_qsl(u.query) + [('view', 'flat')]
	35	return urlunparse(u._replace(query=urlencode(qs)))
	36
	37
	38	@dataclass(frozen=True)
	39	class Chunk:
	40	icon: str \| None
	41	character: Tag \| None
	42	screen_name: Tag \| None
	43	author: Tag \| None
	44	content: Tag
	45
	46	# We avoid the name "post" because the Glowfic community uses the term
	47	# inconsistently:
	48	# * The Glowfic software sometimes uses "post" to refer to a whole thread
	49	# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
	50	# but mostly uses "post" to refer to just the first chunk in a thread
	51	# (in the HTML and UI). The non-first chunks are "replies".
	52	# * Readers and this software don't need to distinguish first-chunks and
	53	# non-first-chunks.
	54	# * Humans in the community tend to use "posts" to mean chunks.
	55
	56
	57	class Thread:
	58
	59	def __init__(self, dom: BeautifulSoup) -> None:
	60	self._dom = dom
	61
	62	def title(self) -> str \| None:
	63	span = self._dom.findChild("span", id="post-title")
	64	if not isinstance(span, Tag):
	65	return None
	66	return span.text.strip()
	67
	68	def chunkDOMs(self) -> Iterable[Tag]:
	69	def text() -> Tag:
	70	body = self._dom.body
	71	assert body
	72	text = body.find_next("div", class_="post-post")
	73	assert isinstance(text, Tag)
	74	return text
	75
	76	def the_replies() -> Iterable[Tag]:
	77	rs = self._dom.find_all("div", class_="post-reply")
	78	assert all(isinstance(r, Tag) for r in rs)
	79	return rs
	80
	81	return itertools.chain([text()], the_replies())
	82
	83
	84	def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
	85
	86	def getIcon() -> str \| None:
	87	icon_div = chunk_dom.findChild('div', class_='post-icon')
	88	if icon_div is None:
	89	return None
	90	assert isinstance(icon_div, Tag)
	91	icon_img = icon_div.findChild('img')
	92	if icon_img is None:
	93	return None
	94	assert isinstance(icon_img, Tag)
	95	return image_store.get_image(icon_img.attrs['src'])
	96
	97	def getByClass(css_class: str) -> Tag \| None:
	98	tag = chunk_dom.findChild('div', class_=css_class)
	99	assert tag is None or isinstance(tag, Tag)
	100	return tag
	101
	102	def stripHREF(tag: Tag) -> None:
	103	for c in tag.findChildren("a"):
	104	if "href" in c.attrs:
	105	del c.attrs["href"]
	106
	107	def getMeta(css_class: str) -> Tag \| None:
	108	tag = getByClass(css_class)
	109	if tag is None:
	110	return None
	111	stripHREF(tag)
	112	return tag
	113
	114	content = chunk_dom.findChild('div', class_='post-content')
	115	assert isinstance(content, Tag)
	116
	117	return Chunk(getIcon(),
	118	getMeta('post-character'),
	119	getMeta('post-screenname'),
	120	getMeta('post-author'),
	121	content)
	122
	123
	124	def renderChunk(texifier: Texifier, chunk: Chunk) -> bytes:
	125	return b''.join([
	126	br'\glowhead{',
	127	br'\glowicon{%s}' % chunk.icon.encode('UTF-8') if chunk.icon else b'',
	128	b'}{',
	129	texifier.texify(chunk.character) if chunk.character else b'',
	130	b'}{',
	131	texifier.texify(chunk.screen_name) if chunk.screen_name else b'',
	132	b'}{',
	133	texifier.texify(chunk.author) if chunk.author else b'',
	134	b'}',
	135	texifier.texify(chunk.content)])
	136
	137
	138	ContentOnlyLayout = br'''
	139	\newcommand{\glowhead}[4]{}
	140	'''
	141
	142
	143	BelowIconLayout = br'''
	144	\newcommand{\glowhead}[4]{\wrapstuffclear
	145	\begin{wrapstuff}[l]
	146	\fbox{
	147	\begin{varwidth}{0.5\textwidth}
	148	\smash{\parbox[t][0pt]{0pt}{
	149	\setlength{\fboxrule}{0.2pt}
	150	\setlength{\fboxsep}{0pt}
	151	\vspace{-3.4pt}
	152	\fbox{\hspace{107mm}}
	153	}\\*}
	154	\vspace{-1em}
	155	\begin{center}
	156	#1\ifnotempty
	157	{#1}{\\*}#2\ifnotempty
	158	{#2}{\\*}#3\ifnotempty
	159	{#3}{\\*}#4
	160	\end{center}
	161	\end{varwidth}
	162	}
	163	\end{wrapstuff}
	164
	165	\strut
	166
	167	\noindent}'''
	168
	169
	170	# Why is \textwidth not the width of the text?
	171	# Why is the width of the text .765\textwidth?
	172	BesideIconLayout = br'''
	173	\newcommand{\glowhead}[4]{
	174
	175	\strut
	176
	177	\noindent\fbox{
	178	#1
	179	\parbox[b]{.765\textwidth}{
	180	\begin{center}
	181	#2\ifnotempty
	182	{#2}{\\*}#3\ifnotempty
	183	{#3}{\\*}#4
	184	\end{center}
	185	}
	186	}\\*
	187	\vspace{-0.75em}\\*
	188	\noindent}'''