From: Scott Worley Date: Fri, 24 Nov 2023 03:26:53 +0000 (-0800) Subject: Begin parsing glowfic html X-Git-Url: http://git.scottworley.com/paperdoorknob/commitdiff_plain/6409066b6abbe205fa64f844a01516f3f3de2553?ds=sidebyside Begin parsing glowfic html --- diff --git a/paperdoorknob.py b/paperdoorknob.py index bb8bdd1..b88c02d 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -8,11 +8,24 @@ from argparse import ArgumentParser import os.path from bs4 import BeautifulSoup +from bs4.element import Tag import requests import requests_cache from xdg_base_dirs import xdg_cache_home +class Post: + def __init__(self, html: BeautifulSoup) -> None: + self._html = html + + def text(self) -> Tag: + body = self._html.body + assert body + text = body.find_next("div", class_="post-post") + assert isinstance(text, Tag) + return text + + def command_line_parser() -> ArgumentParser: parser = ArgumentParser(prog='paperdoorknob', description='Print glowfic') parser.add_argument( @@ -37,7 +50,8 @@ def fetch(url: str, session: requests.Session, timeout: int) -> BeautifulSoup: def main() -> None: args = command_line_parser().parse_args() with requests_cache.CachedSession(args.cache_path, cache_control=True) as session: - fetch(args.url, session, args.timeout) + html = fetch(args.url, session, args.timeout) + Post(html) if __name__ == '__main__': diff --git a/paperdoorknob_test.py b/paperdoorknob_test.py index ab13eed..83cf76a 100644 --- a/paperdoorknob_test.py +++ b/paperdoorknob_test.py @@ -28,7 +28,13 @@ class FakeGlowficHTTPRequestHandler(BaseHTTPRequestHandler): return 200 def do_GET(self) -> None: - body = b'This is glowfic' + body = b''' + +
+ This is glowfic +
+ +''' self.send_response(self._response_code()) self.send_header("Content-type", "text/html") self.send_header("Content-Length", str(len(body))) @@ -83,11 +89,9 @@ class TestFetch(unittest.TestCase): def testFetchConents(self) -> None: with requests.session() as s: - doc = paperdoorknob.fetch( - f"http://localhost:{self._port()}", s, TIMEOUT) - body = doc.body - assert body - self.assertEqual(body.text, "This is glowfic") + post = paperdoorknob.Post(paperdoorknob.fetch( + f"http://localhost:{self._port()}", s, TIMEOUT)) + self.assertEqual(post.text().text.strip(), "This is glowfic") def testFetchErrors(self) -> None: with requests.session() as s: