From: Scott Worley Date: Wed, 20 Dec 2023 07:39:34 +0000 (-0800) Subject: ImageStore X-Git-Url: http://git.scottworley.com/paperdoorknob/commitdiff_plain/91fe9916122adaee2cf1695040f906d709e1aa1c?ds=inline;hp=e10b5b6f112c057ab33ad46f8a3385d3bcd23e1d ImageStore --- diff --git a/fetch.py b/fetch.py index 7776f93..d526753 100644 --- a/fetch.py +++ b/fetch.py @@ -47,8 +47,13 @@ class FakeFetcher(Fetcher): def __init__(self, resources: dict[str, bytes]) -> None: self._resources = resources + self._fetch_count = 0 def fetch(self, url: str) -> bytes: + self._fetch_count += 1 if url not in self._resources: raise requests.HTTPError("URL not found") return self._resources[url] + + def request_count(self) -> int: + return self._fetch_count diff --git a/images.py b/images.py new file mode 100644 index 0000000..178f708 --- /dev/null +++ b/images.py @@ -0,0 +1,48 @@ +# paperdoorknob: Print glowfic +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, version 3. + +import os +import os.path + +from fetch import Fetcher + + +class ImageStoreError(Exception): + pass + + +class ImageStore: + + def __init__(self, root_path: str, fetcher: Fetcher) -> None: + self._root_path = root_path + self._fetcher = fetcher + self._images: dict[str, str] = {} + self._filenames: set[str] = set() + + def _filename(self, url: str) -> str: + assert url not in self._images + base = os.path.basename(url) + if base not in self._filenames: + return base + stem, ext = os.path.splitext(base) + for i in range(10000): + name = f"{stem}-{i:04d}{ext}" + if name not in self._filenames: + return name + raise ImageStoreError( + 'Unexpectedly-many similarly-named images fetched?') + + def get_image(self, url: str) -> str: + if url not in self._images: + image_data = self._fetcher.fetch(url) + name = self._filename(url) + path = os.path.join(self._root_path, name) + os.makedirs(self._root_path, exist_ok=True) + with open(path, "wb") as f: + f.write(image_data) + self._filenames.add(name) + self._images[url] = path + return self._images[url] diff --git a/images_test.py b/images_test.py new file mode 100644 index 0000000..8ada1ee --- /dev/null +++ b/images_test.py @@ -0,0 +1,87 @@ +# paperdoorknob: Print glowfic +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, version 3. + + +import unittest + +from fetch import FakeFetcher +from images import ImageStore + + +class TestImageStore(unittest.TestCase): + def setUp(self) -> None: + self._fetcher = FakeFetcher({ + 'https://example.com/images/alice.png': b'ALICE', + 'https://example.com/images/bob.jpeg': b'BOB', + 'https://example.com/alt_images/bob.jpeg': b'BOBBY', + 'https://example.com/images/carol': b'CAROL', + 'https://example.com/alt_images/carol': b'CAROLINE', + 'https://example.com/other_images/carol': b'CAROLINA'}) + + def testFetchOnce(self) -> None: + store = ImageStore('istest_fetch_once', self._fetcher) + self.assertEqual(self._fetcher.request_count(), 0) + a1 = store.get_image('https://example.com/images/alice.png') + self.assertEqual(self._fetcher.request_count(), 1) + a2 = store.get_image('https://example.com/images/alice.png') + self.assertEqual(self._fetcher.request_count(), 1) + self.assertEqual(a1, a2) + self.assertEqual(a1, 'istest_fetch_once/alice.png') + with open(a1, 'rb') as f: + self.assertEqual(f.read(), b'ALICE') + + self.assertEqual(self._fetcher.request_count(), 1) + b1 = store.get_image('https://example.com/images/bob.jpeg') + self.assertEqual(self._fetcher.request_count(), 2) + b2 = store.get_image('https://example.com/images/bob.jpeg') + self.assertEqual(self._fetcher.request_count(), 2) + self.assertEqual(b1, b2) + self.assertEqual(b1, 'istest_fetch_once/bob.jpeg') + with open(b1, 'rb') as f: + self.assertEqual(f.read(), b'BOB') + + a3 = store.get_image('https://example.com/images/alice.png') + self.assertEqual(self._fetcher.request_count(), 2) + self.assertEqual(a1, a3) + + def testNameCollision(self) -> None: + store = ImageStore('istest_name_collision', self._fetcher) + self.assertEqual(self._fetcher.request_count(), 0) + b1 = store.get_image('https://example.com/images/bob.jpeg') + self.assertEqual(self._fetcher.request_count(), 1) + b2 = store.get_image('https://example.com/alt_images/bob.jpeg') + self.assertEqual(self._fetcher.request_count(), 2) + self.assertNotEqual(b1, b2) + self.assertEqual(b1, 'istest_name_collision/bob.jpeg') + self.assertEqual(b2, 'istest_name_collision/bob-0000.jpeg') + with open(b1, 'rb') as f: + self.assertEqual(f.read(), b'BOB') + with open(b2, 'rb') as f: + self.assertEqual(f.read(), b'BOBBY') + + self.assertEqual(self._fetcher.request_count(), 2) + c1 = store.get_image('https://example.com/images/carol') + self.assertEqual(self._fetcher.request_count(), 3) + c2 = store.get_image('https://example.com/alt_images/carol') + self.assertEqual(self._fetcher.request_count(), 4) + c3 = store.get_image('https://example.com/other_images/carol') + self.assertEqual(self._fetcher.request_count(), 5) + self.assertNotEqual(c1, c2) + self.assertNotEqual(c2, c3) + self.assertNotEqual(c1, c3) + self.assertEqual(c1, 'istest_name_collision/carol') + self.assertEqual(c2, 'istest_name_collision/carol-0000') + self.assertEqual(c3, 'istest_name_collision/carol-0001') + with open(c1, 'rb') as f: + self.assertEqual(f.read(), b'CAROL') + with open(c2, 'rb') as f: + self.assertEqual(f.read(), b'CAROLINE') + with open(c3, 'rb') as f: + self.assertEqual(f.read(), b'CAROLINA') + + +if __name__ == '__main__': + unittest.main() diff --git a/setup.py b/setup.py index 948b6bc..1ef11e6 100644 --- a/setup.py +++ b/setup.py @@ -12,6 +12,7 @@ setup( 'domfilter', 'fetch', 'htmlfilter', + 'images', 'paperdoorknob', 'spec', 'texify',