1 # paperdoorknob: Print glowfic
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
10 from typing
import Iterable
12 from bs4
import BeautifulSoup
13 from bs4
.element
import Tag
15 from args
import spec_from_commandline_args
19 def parse(content
: bytes) -> BeautifulSoup
:
20 return BeautifulSoup(content
, 'html.parser')
23 def clean(html
: BeautifulSoup
) -> BeautifulSoup
:
24 for eb
in html
.find_all("div", class_
="post-edit-box"):
26 for footer
in html
.find_all("div", class_
="post-footer"):
31 def replies(html
: BeautifulSoup
) -> Iterable
[Tag
]:
35 text
= body
.find_next("div", class_
="post-post")
36 assert isinstance(text
, Tag
)
39 def the_replies() -> Iterable
[Tag
]:
40 rs
= html
.find_all("div", class_
="post-reply")
41 assert all(isinstance(r
, Tag
) for r
in rs
)
44 return itertools
.chain([text()], the_replies())
47 def process(spec
: Spec
) -> None:
48 spec
.texout
.write(b
'\\documentclass{article}\n\\begin{document}\n')
49 html
= clean(parse(spec
.htmlfilter(spec
.fetcher
.fetch(spec
.url
))))
50 for r
in replies(html
):
51 spec
.texout
.write(spec
.texifier
.texify(r
))
52 spec
.texout
.write(b
'\\end{document}\n')
56 with spec_from_commandline_args() as spec
:
60 if __name__
== '__main__':