]> git.scottworley.com Git - paperdoorknob/commitdiff
Parse HTML
authorScott Worley <scottworley@scottworley.com>
Fri, 24 Nov 2023 02:34:52 +0000 (18:34 -0800)
committerScott Worley <scottworley@scottworley.com>
Wed, 20 Dec 2023 01:37:10 +0000 (17:37 -0800)
default.nix
paperdoorknob.py
paperdoorknob_test.py

index 69a71496916a7533de4260f0c2beaa9ec1915db2..bf96eb8c437f74b47d65a4b9a09ed4d9d258e55d 100644 (file)
@@ -1,12 +1,14 @@
 { pkgs ? import <nixpkgs> { }, lint ? false }:
 pkgs.python3Packages.callPackage ({ lib, buildPythonPackage, autopep8, mypy
-  , pylint, requests, requests-cache, types-requests, xdg-base-dirs }:
+  , pylint, beautifulsoup4, requests, requests-cache, types-beautifulsoup4
+  , types-requests, xdg-base-dirs }:
   buildPythonPackage rec {
     pname = "paperdoorknob";
     version = "0.0.1";
     src = lib.cleanSource ./.;
-    propagatedBuildInputs = [ requests requests-cache xdg-base-dirs ];
-    nativeCheckInputs = [ mypy types-requests ]
+    propagatedBuildInputs =
+      [ beautifulsoup4 requests requests-cache xdg-base-dirs ];
+    nativeCheckInputs = [ mypy types-beautifulsoup4 types-requests ]
       ++ lib.optionals lint [ autopep8 pylint ];
     doCheck = true;
     checkPhase = "./test.sh";
index b7e4349b98ab1d97ae2ff0ad6f50e4740bb6440f..bb8bdd1ae38a660b3a6f90aa3d5aa317505beee3 100644 (file)
@@ -7,6 +7,7 @@
 
 from argparse import ArgumentParser
 import os.path
+from bs4 import BeautifulSoup
 import requests
 import requests_cache
 from xdg_base_dirs import xdg_cache_home
@@ -27,9 +28,10 @@ def command_line_parser() -> ArgumentParser:
     return parser
 
 
-def fetch(url: str, session: requests.Session, timeout: int) -> None:
+def fetch(url: str, session: requests.Session, timeout: int) -> BeautifulSoup:
     with session.get(url, timeout=timeout) as r:
         r.raise_for_status()
+        return BeautifulSoup(r.text, 'html.parser')
 
 
 def main() -> None:
index cb70ac56dd214d26988a86e915aea762cb74008b..ab13eedf9730acccb9ec2b0f8ec64fc9c02ce016 100644 (file)
@@ -81,6 +81,14 @@ class TestFetch(unittest.TestCase):
             paperdoorknob.fetch(f"http://localhost:{self._port()}", s, TIMEOUT)
             self.assertEqual(self._request_counter, 1)
 
+    def testFetchConents(self) -> None:
+        with requests.session() as s:
+            doc = paperdoorknob.fetch(
+                f"http://localhost:{self._port()}", s, TIMEOUT)
+            body = doc.body
+            assert body
+            self.assertEqual(body.text, "This is glowfic")
+
     def testFetchErrors(self) -> None:
         with requests.session() as s:
             with self.assertRaises(requests.HTTPError):