]> git.scottworley.com Git - paperdoorknob/blame - paperdoorknob.py
Strip edit-boxes and footers
[paperdoorknob] / paperdoorknob.py
CommitLineData
92b11a10
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
8from argparse import ArgumentParser
55958ec0 9import itertools
ba3b7c52 10import os.path
a0d30541
SW
11
12from typing import Iterable
13
136277e3 14from bs4 import BeautifulSoup
6409066b 15from bs4.element import Tag
b25a2f90 16import requests
b34a368f 17import requests_cache
ba3b7c52 18from xdg_base_dirs import xdg_cache_home
92b11a10
SW
19
20
6409066b
SW
21class Post:
22 def __init__(self, html: BeautifulSoup) -> None:
23 self._html = html
170e50a0
SW
24 for eb in self._html.find_all("div", class_="post-edit-box"):
25 eb.decompose()
26 for footer in self._html.find_all("div", class_="post-footer"):
27 footer.decompose()
6409066b
SW
28
29 def text(self) -> Tag:
30 body = self._html.body
31 assert body
32 text = body.find_next("div", class_="post-post")
33 assert isinstance(text, Tag)
34 return text
35
a0d30541
SW
36 def replies(self) -> Iterable[Tag]:
37 replies = self._html.find_all("div", class_="post-reply")
38 assert all(isinstance(r, Tag) for r in replies)
39 return replies
40
55958ec0
SW
41 def entries(self) -> Iterable[Tag]:
42 return itertools.chain([self.text()], self.replies())
43
6409066b 44
92b11a10
SW
45def command_line_parser() -> ArgumentParser:
46 parser = ArgumentParser(prog='paperdoorknob', description='Print glowfic')
ba3b7c52
SW
47 parser.add_argument(
48 '--cache_path',
49 metavar='PATH',
50 help='Where to keep the http cache (instead of %(default)s)',
51 default=os.path.join(xdg_cache_home(), "paperdoorknob"))
b25a2f90
SW
52 parser.add_argument(
53 '--timeout',
54 help='How long to wait for HTTP requests, in seconds',
55 default=30)
56 parser.add_argument('url', help='URL to retrieve')
92b11a10
SW
57 return parser
58
59
136277e3 60def fetch(url: str, session: requests.Session, timeout: int) -> BeautifulSoup:
e138a9b4
SW
61 with session.get(url, timeout=timeout) as r:
62 r.raise_for_status()
136277e3 63 return BeautifulSoup(r.text, 'html.parser')
b25a2f90
SW
64
65
92b11a10 66def main() -> None:
b25a2f90 67 args = command_line_parser().parse_args()
4c1cf54e 68 with requests_cache.CachedSession(args.cache_path, cache_control=True) as session:
6409066b
SW
69 html = fetch(args.url, session, args.timeout)
70 Post(html)
92b11a10
SW
71
72
73if __name__ == '__main__':
74 main()