]>
git.scottworley.com Git - paperdoorknob/blob - paperdoorknob.py
a31b659d2d403c67e0a44d4e32ebfd218168cc1d
1 # paperdoorknob: Print glowfic
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
8 from argparse
import ArgumentParser
12 from typing
import IO
, Iterable
14 from bs4
import BeautifulSoup
15 from bs4
.element
import Tag
18 from xdg_base_dirs
import xdg_cache_home
21 def command_line_parser() -> ArgumentParser
:
22 parser
= ArgumentParser(prog
='paperdoorknob', description
='Print glowfic')
26 help='Where to keep the http cache (instead of %(default)s)',
27 default
=os
.path
.join(xdg_cache_home(), "paperdoorknob"))
30 help='The filename stem at which to write output ' +
31 '(eg: "%(default)s" produces %(default)s.text, %(default)s.pdf, etc.)',
33 parser
.add_argument('--pandoc', help='Location of the pandoc executable')
36 help='How long to wait for HTTP requests, in seconds',
38 parser
.add_argument('url', help='URL to retrieve')
42 def fetch(url
: str, session
: requests
.Session
, timeout
: int) -> BeautifulSoup
:
43 with session
.get(url
, timeout
=timeout
) as r
:
45 return BeautifulSoup(r
.text
, 'html.parser')
48 def clean(html
: BeautifulSoup
) -> BeautifulSoup
:
49 for eb
in html
.find_all("div", class_
="post-edit-box"):
51 for footer
in html
.find_all("div", class_
="post-footer"):
56 def replies(html
: BeautifulSoup
) -> Iterable
[Tag
]:
60 text
= body
.find_next("div", class_
="post-post")
61 assert isinstance(text
, Tag
)
64 def the_replies() -> Iterable
[Tag
]:
65 rs
= html
.find_all("div", class_
="post-reply")
66 assert all(isinstance(r
, Tag
) for r
in rs
)
69 return itertools
.chain([text()], the_replies())
74 session
: requests
.Session
,
78 html
= clean(fetch(url
, session
, timeout
))
80 print("soon", file=texout
)
84 args
= command_line_parser().parse_args()
85 with requests_cache
.CachedSession(args
.cache_path
, cache_control
=True) as session
:
86 with open(args
.out
+ '.tex', 'w', encoding
='UTF-8') as texout
:
87 process(args
.url
, session
, args
.timeout
, texout
)
90 if __name__
== '__main__':