from abc import ABC, abstractmethod
from contextlib import contextmanager
-from typing import Iterator
+from sys import stderr
+from typing import Callable, Iterator
import requests
import requests_cache
+from version import paperdoorknob_version
+
+
+_headers = {'User-Agent': f'paperdoorknob/{paperdoorknob_version} ' +
+ '(https://git.scottworley.com/paperdoorknob/)'}
+
class Fetcher(ABC):
@abstractmethod
self._timeout = timeout
def fetch(self, url: str) -> bytes:
- with self._session.get(url, timeout=self._timeout) as r:
+ with self._session.get(url, timeout=self._timeout, headers=_headers) as r:
+ r.raise_for_status()
+ return r.content
+
+
+class _CachingFetcher(Fetcher):
+
+ def __init__(
+ self,
+ session: requests_cache.CachedSession,
+ timeout: int) -> None:
+ self._session = session
+ self._timeout = timeout
+ self._request_count = 0
+ self._cache_hit_count = 0
+
+ def fetch(self, url: str) -> bytes:
+ with self._session.get(url, timeout=self._timeout, headers=_headers) as r:
r.raise_for_status()
+ self._request_count += 1
+ self._cache_hit_count += int(r.from_cache)
return r.content
+ @property
+ def request_count(self) -> int:
+ return self._request_count
+
+ @property
+ def cache_hit_count(self) -> int:
+ return self._cache_hit_count
+
@contextmanager
def DirectFetcher(timeout: int) -> Iterator[_SessionFetcher]:
@contextmanager
-def CachingFetcher(cache_path: str, timeout: int) -> Iterator[_SessionFetcher]:
+def CachingFetcher(
+ cache_path: str,
+ timeout: int,
+ log: Callable[[str], None] = lambda x: print(x, file=stderr),
+) -> Iterator[_CachingFetcher]:
with requests_cache.CachedSession(cache_path, cache_control=True) as session:
- yield _SessionFetcher(session, timeout)
+ fetcher = _CachingFetcher(session, timeout)
+ yield fetcher
+ if fetcher.request_count > 0:
+ percent = 100.0 * fetcher.cache_hit_count / fetcher.request_count
+ log(f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)")
class FakeFetcher(Fetcher):
def fetch(self, url: str) -> bytes:
self._fetch_count += 1
if url not in self._resources:
- raise requests.HTTPError("URL not found")
+ raise requests.HTTPError("URL not found", url)
return self._resources[url]
def request_count(self) -> int: