]> git.scottworley.com Git - paperdoorknob/commitdiff
ImageStore
authorScott Worley <scottworley@scottworley.com>
Wed, 20 Dec 2023 07:39:34 +0000 (23:39 -0800)
committerScott Worley <scottworley@scottworley.com>
Wed, 20 Dec 2023 07:41:28 +0000 (23:41 -0800)
fetch.py
images.py [new file with mode: 0644]
images_test.py [new file with mode: 0644]
setup.py

index 7776f93085943ef8f04948858eeadb0d69323d86..d5267534a100e1d0e527ef347547cb57b2ae6f31 100644 (file)
--- a/fetch.py
+++ b/fetch.py
@@ -47,8 +47,13 @@ class FakeFetcher(Fetcher):
 
     def __init__(self, resources: dict[str, bytes]) -> None:
         self._resources = resources
+        self._fetch_count = 0
 
     def fetch(self, url: str) -> bytes:
+        self._fetch_count += 1
         if url not in self._resources:
             raise requests.HTTPError("URL not found")
         return self._resources[url]
+
+    def request_count(self) -> int:
+        return self._fetch_count
diff --git a/images.py b/images.py
new file mode 100644 (file)
index 0000000..178f708
--- /dev/null
+++ b/images.py
@@ -0,0 +1,48 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+import os
+import os.path
+
+from fetch import Fetcher
+
+
+class ImageStoreError(Exception):
+    pass
+
+
+class ImageStore:
+
+    def __init__(self, root_path: str, fetcher: Fetcher) -> None:
+        self._root_path = root_path
+        self._fetcher = fetcher
+        self._images: dict[str, str] = {}
+        self._filenames: set[str] = set()
+
+    def _filename(self, url: str) -> str:
+        assert url not in self._images
+        base = os.path.basename(url)
+        if base not in self._filenames:
+            return base
+        stem, ext = os.path.splitext(base)
+        for i in range(10000):
+            name = f"{stem}-{i:04d}{ext}"
+            if name not in self._filenames:
+                return name
+        raise ImageStoreError(
+            'Unexpectedly-many similarly-named images fetched?')
+
+    def get_image(self, url: str) -> str:
+        if url not in self._images:
+            image_data = self._fetcher.fetch(url)
+            name = self._filename(url)
+            path = os.path.join(self._root_path, name)
+            os.makedirs(self._root_path, exist_ok=True)
+            with open(path, "wb") as f:
+                f.write(image_data)
+            self._filenames.add(name)
+            self._images[url] = path
+        return self._images[url]
diff --git a/images_test.py b/images_test.py
new file mode 100644 (file)
index 0000000..8ada1ee
--- /dev/null
@@ -0,0 +1,87 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import unittest
+
+from fetch import FakeFetcher
+from images import ImageStore
+
+
+class TestImageStore(unittest.TestCase):
+    def setUp(self) -> None:
+        self._fetcher = FakeFetcher({
+            'https://example.com/images/alice.png': b'ALICE',
+            'https://example.com/images/bob.jpeg': b'BOB',
+            'https://example.com/alt_images/bob.jpeg': b'BOBBY',
+            'https://example.com/images/carol': b'CAROL',
+            'https://example.com/alt_images/carol': b'CAROLINE',
+            'https://example.com/other_images/carol': b'CAROLINA'})
+
+    def testFetchOnce(self) -> None:
+        store = ImageStore('istest_fetch_once', self._fetcher)
+        self.assertEqual(self._fetcher.request_count(), 0)
+        a1 = store.get_image('https://example.com/images/alice.png')
+        self.assertEqual(self._fetcher.request_count(), 1)
+        a2 = store.get_image('https://example.com/images/alice.png')
+        self.assertEqual(self._fetcher.request_count(), 1)
+        self.assertEqual(a1, a2)
+        self.assertEqual(a1, 'istest_fetch_once/alice.png')
+        with open(a1, 'rb') as f:
+            self.assertEqual(f.read(), b'ALICE')
+
+        self.assertEqual(self._fetcher.request_count(), 1)
+        b1 = store.get_image('https://example.com/images/bob.jpeg')
+        self.assertEqual(self._fetcher.request_count(), 2)
+        b2 = store.get_image('https://example.com/images/bob.jpeg')
+        self.assertEqual(self._fetcher.request_count(), 2)
+        self.assertEqual(b1, b2)
+        self.assertEqual(b1, 'istest_fetch_once/bob.jpeg')
+        with open(b1, 'rb') as f:
+            self.assertEqual(f.read(), b'BOB')
+
+        a3 = store.get_image('https://example.com/images/alice.png')
+        self.assertEqual(self._fetcher.request_count(), 2)
+        self.assertEqual(a1, a3)
+
+    def testNameCollision(self) -> None:
+        store = ImageStore('istest_name_collision', self._fetcher)
+        self.assertEqual(self._fetcher.request_count(), 0)
+        b1 = store.get_image('https://example.com/images/bob.jpeg')
+        self.assertEqual(self._fetcher.request_count(), 1)
+        b2 = store.get_image('https://example.com/alt_images/bob.jpeg')
+        self.assertEqual(self._fetcher.request_count(), 2)
+        self.assertNotEqual(b1, b2)
+        self.assertEqual(b1, 'istest_name_collision/bob.jpeg')
+        self.assertEqual(b2, 'istest_name_collision/bob-0000.jpeg')
+        with open(b1, 'rb') as f:
+            self.assertEqual(f.read(), b'BOB')
+        with open(b2, 'rb') as f:
+            self.assertEqual(f.read(), b'BOBBY')
+
+        self.assertEqual(self._fetcher.request_count(), 2)
+        c1 = store.get_image('https://example.com/images/carol')
+        self.assertEqual(self._fetcher.request_count(), 3)
+        c2 = store.get_image('https://example.com/alt_images/carol')
+        self.assertEqual(self._fetcher.request_count(), 4)
+        c3 = store.get_image('https://example.com/other_images/carol')
+        self.assertEqual(self._fetcher.request_count(), 5)
+        self.assertNotEqual(c1, c2)
+        self.assertNotEqual(c2, c3)
+        self.assertNotEqual(c1, c3)
+        self.assertEqual(c1, 'istest_name_collision/carol')
+        self.assertEqual(c2, 'istest_name_collision/carol-0000')
+        self.assertEqual(c3, 'istest_name_collision/carol-0001')
+        with open(c1, 'rb') as f:
+            self.assertEqual(f.read(), b'CAROL')
+        with open(c2, 'rb') as f:
+            self.assertEqual(f.read(), b'CAROLINE')
+        with open(c3, 'rb') as f:
+            self.assertEqual(f.read(), b'CAROLINA')
+
+
+if __name__ == '__main__':
+    unittest.main()
index 948b6bc5f3a79544f2411bc22df6509c27105f4d..1ef11e6e9ccebbd71f57eb57453bc64cecc3b1c4 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -12,6 +12,7 @@ setup(
         'domfilter',
         'fetch',
         'htmlfilter',
+        'images',
         'paperdoorknob',
         'spec',
         'texify',