]> git.scottworley.com Git - paperdoorknob/blame - fetch.py
FakeFetcher: Show bad URLs in error messages
[paperdoorknob] / fetch.py
CommitLineData
705973e7
SW
1# paperdoorknob: Print glowfic
2#
3# This program is free software: you can redistribute it and/or modify it
4# under the terms of the GNU General Public License as published by the
5# Free Software Foundation, version 3.
6
7
8from abc import ABC, abstractmethod
9from contextlib import contextmanager
75c5665e
SW
10from sys import stderr
11from typing import IO, Iterator
705973e7
SW
12
13import requests
14import requests_cache
15
16
17class Fetcher(ABC):
18 @abstractmethod
19 def fetch(self, url: str) -> bytes:
20 raise NotImplementedError()
21
22
23class _SessionFetcher(Fetcher):
24
25 def __init__(self, session: requests.Session, timeout: int) -> None:
26 self._session = session
27 self._timeout = timeout
28
29 def fetch(self, url: str) -> bytes:
30 with self._session.get(url, timeout=self._timeout) as r:
31 r.raise_for_status()
32 return r.content
33
34
75c5665e
SW
35class _CachingFetcher(Fetcher):
36
37 def __init__(
38 self,
39 session: requests_cache.CachedSession,
40 timeout: int) -> None:
41 self._session = session
42 self._timeout = timeout
43 self._request_count = 0
44 self._cache_hit_count = 0
45
46 def fetch(self, url: str) -> bytes:
47 with self._session.get(url, timeout=self._timeout) as r:
48 r.raise_for_status()
49 self._request_count += 1
50 self._cache_hit_count += int(r.from_cache)
51 return r.content
52
53 @property
54 def request_count(self) -> int:
55 return self._request_count
56
57 @property
58 def cache_hit_count(self) -> int:
59 return self._cache_hit_count
60
61
705973e7
SW
62@contextmanager
63def DirectFetcher(timeout: int) -> Iterator[_SessionFetcher]:
64 with requests.session() as session:
65 yield _SessionFetcher(session, timeout)
66
67
68@contextmanager
75c5665e
SW
69def CachingFetcher(
70 cache_path: str,
71 timeout: int,
72 report_stream: IO[str] = stderr) -> Iterator[_CachingFetcher]:
705973e7 73 with requests_cache.CachedSession(cache_path, cache_control=True) as session:
75c5665e
SW
74 fetcher = _CachingFetcher(session, timeout)
75 yield fetcher
76 if fetcher.request_count > 0:
77 percent = 100.0 * fetcher.cache_hit_count / fetcher.request_count
78 print(
79 f"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)",
80 file=report_stream)
38621839
SW
81
82
83class FakeFetcher(Fetcher):
84
85 def __init__(self, resources: dict[str, bytes]) -> None:
86 self._resources = resources
91fe9916 87 self._fetch_count = 0
38621839
SW
88
89 def fetch(self, url: str) -> bytes:
91fe9916 90 self._fetch_count += 1
38621839 91 if url not in self._resources:
ae7b6283 92 raise requests.HTTPError("URL not found", url)
38621839 93 return self._resources[url]
91fe9916
SW
94
95 def request_count(self) -> int:
96 return self._fetch_count