]> git.scottworley.com Git - paperdoorknob/blobdiff - fetch.py
next_thread should be an absolute URL
[paperdoorknob] / fetch.py
index 1d2070183a44dded893850630f68d6c2b9c06bb9..eb904a4f4b0e525756c977d442fb0e4144dc0825 100644 (file)
--- a/fetch.py
+++ b/fetch.py
@@ -8,11 +8,17 @@
 from abc import ABC, abstractmethod
 from contextlib import contextmanager
 from sys import stderr
-from typing import IO, Iterator
+from typing import Callable, Iterator
 
 import requests
 import requests_cache
 
+from version import paperdoorknob_version
+
+
+_headers = {'User-Agent': f'paperdoorknob/{paperdoorknob_version} ' +
+            '(https://git.scottworley.com/paperdoorknob/)'}
+
 
 class Fetcher(ABC):
     @abstractmethod
@@ -27,7 +33,7 @@ class _SessionFetcher(Fetcher):
         self._timeout = timeout
 
     def fetch(self, url: str) -> bytes:
-        with self._session.get(url, timeout=self._timeout) as r:
+        with self._session.get(url, timeout=self._timeout, headers=_headers) as r:
             r.raise_for_status()
             return r.content
 
@@ -44,7 +50,7 @@ class _CachingFetcher(Fetcher):
         self._cache_hit_count = 0
 
     def fetch(self, url: str) -> bytes:
-        with self._session.get(url, timeout=self._timeout) as r:
+        with self._session.get(url, timeout=self._timeout, headers=_headers) as r:
             r.raise_for_status()
             self._request_count += 1
             self._cache_hit_count += int(r.from_cache)
@@ -67,17 +73,16 @@ def DirectFetcher(timeout: int) -> Iterator[_SessionFetcher]:
 
 @contextmanager
 def CachingFetcher(
-        cache_path: str,
-        timeout: int,
-        report_stream: IO[str] = stderr) -> Iterator[_CachingFetcher]:
+    cache_path: str,
+    timeout: int,
+    log: Callable[[str], None] = lambda x: print(x, file=stderr),
+) -> Iterator[_CachingFetcher]:
     with requests_cache.CachedSession(cache_path, cache_control=True) as session:
         fetcher = _CachingFetcher(session, timeout)
         yield fetcher
         if fetcher.request_count > 0:
             percent = 100.0 * fetcher.cache_hit_count / fetcher.request_count
-            print(
-                f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)",
-                file=report_stream)
+            log(f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)")
 
 
 class FakeFetcher(Fetcher):
@@ -89,7 +94,7 @@ class FakeFetcher(Fetcher):
     def fetch(self, url: str) -> bytes:
         self._fetch_count += 1
         if url not in self._resources:
-            raise requests.HTTPError("URL not found")
+            raise requests.HTTPError("URL not found", url)
         return self._resources[url]
 
     def request_count(self) -> int: