]>
git.scottworley.com Git - paperdoorknob/blob - fetch.py
1 # paperdoorknob: Print glowfic
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
8 from abc
import ABC
, abstractmethod
9 from contextlib
import contextmanager
10 from sys
import stderr
11 from typing
import IO
, Iterator
18 'User-Agent': 'paperdoorknob/0.0.1 (https://git.scottworley.com/paperdoorknob/)'}
23 def fetch(self
, url
: str) -> bytes:
24 raise NotImplementedError()
27 class _SessionFetcher(Fetcher
):
29 def __init__(self
, session
: requests
.Session
, timeout
: int) -> None:
30 self
._session
= session
31 self
._timeout
= timeout
33 def fetch(self
, url
: str) -> bytes:
34 with self
._session
.get(url
, timeout
=self
._timeout
, headers
=_headers
) as r
:
39 class _CachingFetcher(Fetcher
):
43 session
: requests_cache
.CachedSession
,
44 timeout
: int) -> None:
45 self
._session
= session
46 self
._timeout
= timeout
47 self
._request
_count
= 0
48 self
._cache
_hit
_count
= 0
50 def fetch(self
, url
: str) -> bytes:
51 with self
._session
.get(url
, timeout
=self
._timeout
, headers
=_headers
) as r
:
53 self
._request
_count
+= 1
54 self
._cache
_hit
_count
+= int(r
.from_cache
)
58 def request_count(self
) -> int:
59 return self
._request
_count
62 def cache_hit_count(self
) -> int:
63 return self
._cache
_hit
_count
67 def DirectFetcher(timeout
: int) -> Iterator
[_SessionFetcher
]:
68 with requests
.session() as session
:
69 yield _SessionFetcher(session
, timeout
)
76 report_stream
: IO
[str] = stderr
) -> Iterator
[_CachingFetcher
]:
77 with requests_cache
.CachedSession(cache_path
, cache_control
=True) as session
:
78 fetcher
= _CachingFetcher(session
, timeout
)
80 if fetcher
.request_count
> 0:
81 percent
= 100.0 * fetcher
.cache_hit_count
/ fetcher
.request_count
83 f
"Fetch cache hits: {fetcher.cache_hit_count} ({percent:.1f}%)",
87 class FakeFetcher(Fetcher
):
89 def __init__(self
, resources
: dict[str, bytes]) -> None:
90 self
._resources
= resources
93 def fetch(self
, url
: str) -> bytes:
94 self
._fetch
_count
+= 1
95 if url
not in self
._resources
:
96 raise requests
.HTTPError("URL not found", url
)
97 return self
._resources
[url
]
99 def request_count(self
) -> int:
100 return self
._fetch
_count