X-Git-Url: http://git.scottworley.com/paperdoorknob/blobdiff_plain/75c5665e089279f9de1a38b31e03f9736ecdc17e..refs/heads/main:/fetch.py?ds=sidebyside diff --git a/fetch.py b/fetch.py index 1d20701..eb904a4 100644 --- a/fetch.py +++ b/fetch.py @@ -8,11 +8,17 @@ from abc import ABC, abstractmethod from contextlib import contextmanager from sys import stderr -from typing import IO, Iterator +from typing import Callable, Iterator import requests import requests_cache +from version import paperdoorknob_version + + +_headers = {'User-Agent': f'paperdoorknob/{paperdoorknob_version} ' + + '(https://git.scottworley.com/paperdoorknob/)'} + class Fetcher(ABC): @abstractmethod @@ -27,7 +33,7 @@ class _SessionFetcher(Fetcher): self._timeout = timeout def fetch(self, url: str) -> bytes: - with self._session.get(url, timeout=self._timeout) as r: + with self._session.get(url, timeout=self._timeout, headers=_headers) as r: r.raise_for_status() return r.content @@ -44,7 +50,7 @@ class _CachingFetcher(Fetcher): self._cache_hit_count = 0 def fetch(self, url: str) -> bytes: - with self._session.get(url, timeout=self._timeout) as r: + with self._session.get(url, timeout=self._timeout, headers=_headers) as r: r.raise_for_status() self._request_count += 1 self._cache_hit_count += int(r.from_cache) @@ -67,17 +73,16 @@ def DirectFetcher(timeout: int) -> Iterator[_SessionFetcher]: @contextmanager def CachingFetcher( - cache_path: str, - timeout: int, - report_stream: IO[str] = stderr) -> Iterator[_CachingFetcher]: + cache_path: str, + timeout: int, + log: Callable[[str], None] = lambda x: print(x, file=stderr), +) -> Iterator[_CachingFetcher]: with requests_cache.CachedSession(cache_path, cache_control=True) as session: fetcher = _CachingFetcher(session, timeout) yield fetcher if fetcher.request_count > 0: percent = 100.0 * fetcher.cache_hit_count / fetcher.request_count - print( - f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)", - file=report_stream) + log(f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)") class FakeFetcher(Fetcher): @@ -89,7 +94,7 @@ class FakeFetcher(Fetcher): def fetch(self, url: str) -> bytes: self._fetch_count += 1 if url not in self._resources: - raise requests.HTTPError("URL not found") + raise requests.HTTPError("URL not found", url) return self._resources[url] def request_count(self) -> int: