]> git.scottworley.com Git - paperdoorknob/commitdiff
Report fetch cache hit rate
authorScott Worley <scottworley@scottworley.com>
Thu, 21 Dec 2023 05:21:53 +0000 (21:21 -0800)
committerScott Worley <scottworley@scottworley.com>
Thu, 21 Dec 2023 05:21:53 +0000 (21:21 -0800)
fetch.py
fetch_test.py

index d5267534a100e1d0e527ef347547cb57b2ae6f31..1d2070183a44dded893850630f68d6c2b9c06bb9 100644 (file)
--- a/fetch.py
+++ b/fetch.py
@@ -7,7 +7,8 @@
 
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
-from typing import Iterator
+from sys import stderr
+from typing import IO, Iterator
 
 import requests
 import requests_cache
@@ -31,6 +32,33 @@ class _SessionFetcher(Fetcher):
             return r.content
 
 
+class _CachingFetcher(Fetcher):
+
+    def __init__(
+            self,
+            session: requests_cache.CachedSession,
+            timeout: int) -> None:
+        self._session = session
+        self._timeout = timeout
+        self._request_count = 0
+        self._cache_hit_count = 0
+
+    def fetch(self, url: str) -> bytes:
+        with self._session.get(url, timeout=self._timeout) as r:
+            r.raise_for_status()
+            self._request_count += 1
+            self._cache_hit_count += int(r.from_cache)
+            return r.content
+
+    @property
+    def request_count(self) -> int:
+        return self._request_count
+
+    @property
+    def cache_hit_count(self) -> int:
+        return self._cache_hit_count
+
+
 @contextmanager
 def DirectFetcher(timeout: int) -> Iterator[_SessionFetcher]:
     with requests.session() as session:
@@ -38,9 +66,18 @@ def DirectFetcher(timeout: int) -> Iterator[_SessionFetcher]:
 
 
 @contextmanager
-def CachingFetcher(cache_path: str, timeout: int) -> Iterator[_SessionFetcher]:
+def CachingFetcher(
+        cache_path: str,
+        timeout: int,
+        report_stream: IO[str] = stderr) -> Iterator[_CachingFetcher]:
     with requests_cache.CachedSession(cache_path, cache_control=True) as session:
-        yield _SessionFetcher(session, timeout)
+        fetcher = _CachingFetcher(session, timeout)
+        yield fetcher
+        if fetcher.request_count > 0:
+            percent = 100.0 * fetcher.cache_hit_count / fetcher.request_count
+            print(
+                f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)",
+                file=report_stream)
 
 
 class FakeFetcher(Fetcher):
index 6bdf69ecc2a39e93f1903d15ce8a5fd8eea1ddf3..5f21feee1c2d293e198d12a5851c38a1e52524ab 100644 (file)
@@ -5,8 +5,11 @@
 # Free Software Foundation, version 3.
 
 
+from io import StringIO
 import unittest
+
 from requests import HTTPError
+
 from testing.fakeserver import FakeGlowficServer
 from fetch import CachingFetcher, DirectFetcher
 
@@ -40,6 +43,13 @@ class TestFetch(unittest.TestCase):
             f.fetch(f"http://localhost:{self._port}")
             self.assertEqual(self._server.request_count(), 1)
 
+    def testCacheHitRateReport(self) -> None:
+        buf = StringIO()
+        with CachingFetcher("testcachehitratereportwithcl", TIMEOUT, buf) as f:
+            for _ in range(7):
+                f.fetch(f"http://localhost:{self._port}")
+        self.assertEqual("Fetch cache hits: 6 (85.7%)\n", buf.getvalue())
+
     def testFetchErrors(self) -> None:
         with DirectFetcher(TIMEOUT) as f:
             with self.assertRaises(HTTPError):