]> git.scottworley.com Git - paperdoorknob/commitdiff
Fetch the non-flat view to get the next-thread link
authorScott Worley <scottworley@scottworley.com>
Fri, 12 Jan 2024 12:05:14 +0000 (04:05 -0800)
committerScott Worley <scottworley@scottworley.com>
Fri, 12 Jan 2024 12:05:14 +0000 (04:05 -0800)
glowfic.py
glowfic_test.py
paperdoorknob_test.py

index 5ffaac31921e360961967832ad090074e5cd2aaa..81a5342c2621d434dd320eb2104fb98d8eb34841 100644 (file)
@@ -58,10 +58,21 @@ class Chunk:
 class Thread:
 
     def __init__(self, spec: Spec) -> None:
 class Thread:
 
     def __init__(self, spec: Spec) -> None:
+        def find_next_thread(dom: BeautifulSoup) -> str | None:
+            for c in dom.findChildren('div', class_='post-navheader'):
+                for a in c.findChildren('a'):
+                    if 'Next Post' in a.text and 'href' in a.attrs and isinstance(
+                            a.attrs['href'], str):
+                        return a.attrs['href']
+            return None
+
         spec.log('Fetching HTML...\r')
         spec.log('Fetching HTML...\r')
-        html = spec.fetcher.fetch(flatURL(spec.url))
+        html = spec.fetcher.fetch(spec.url)
+        flat_html = spec.fetcher.fetch(flatURL(spec.url))
         spec.log('Parsing HTML...\r')
         spec.log('Parsing HTML...\r')
-        self._dom = BeautifulSoup(spec.htmlfilter(html), 'html.parser')
+        self._next_thread = find_next_thread(
+            BeautifulSoup(spec.htmlfilter(html), 'html.parser'))
+        self._dom = BeautifulSoup(spec.htmlfilter(flat_html), 'html.parser')
 
     def title(self) -> str | None:
         span = self._dom.findChild("span", id="post-title")
 
     def title(self) -> str | None:
         span = self._dom.findChild("span", id="post-title")
@@ -69,6 +80,9 @@ class Thread:
             return None
         return span.text.strip()
 
             return None
         return span.text.strip()
 
+    def next_thread(self) -> str | None:
+        return self._next_thread
+
     def chunkDOMs(self) -> Iterable[Tag]:
         def text() -> Tag:
             body = self._dom.body
     def chunkDOMs(self) -> Iterable[Tag]:
         def text() -> Tag:
             body = self._dom.body
index 52d6f86cb9a1681ccaf4d34e3784d054e5352ed5..68debf91cc82a719bec4f3f2ed391db6dc25f5e6 100644 (file)
@@ -17,7 +17,7 @@ from texify import PandocTexifier
 
 def spec_for_testing(html: bytes) -> Spec:
     return Spec('test',
 
 def spec_for_testing(html: bytes) -> Spec:
     return Spec('test',
-                FakeFetcher({'test?view=flat': html}),
+                FakeFetcher({'test': html, 'test?view=flat': html}),
                 FakeImageStore(),
                 lambda x: x,
                 lambda x: None,
                 FakeImageStore(),
                 lambda x: x,
                 lambda x: None,
@@ -102,6 +102,18 @@ class TestThread(unittest.TestCase):
             </body></html>'''))
         self.assertEqual(t.title(), 'Teh Story!')
 
             </body></html>'''))
         self.assertEqual(t.title(), 'Teh Story!')
 
+    def testNextThread(self) -> None:
+        t = Thread(spec_for_testing(b'''
+            <html><body>
+              <div class="post-navheader">
+                <a class="view-button-link" href="/posts/4567"><div class="view-button">Next Post &raquo;</div>
+                </a><a class="view-button-link" href="/posts/4321"><div class="view-button float-none">&laquo; Previous Post</div>
+                </a>
+              </div>
+              <div class="post-container post-post">The "post"</div>
+            </body></html>'''))
+        self.assertEqual(t.next_thread(), '/posts/4567')
+
 
 if __name__ == '__main__':
     unittest.main()
 
 if __name__ == '__main__':
     unittest.main()
index f2cd5f1115e21576d9f771eacfd03d130b93082d..571dda6847abb40b59855b9c90d81d28ce12f95a 100644 (file)
@@ -121,7 +121,8 @@ class TestProcessFromFakeFetcher(BaseTestProcess, unittest.TestCase):
 
     def fetcher(self) -> Fetcher:
         with open('testdata/this-is-glowfic.html', 'rb') as f:
 
     def fetcher(self) -> Fetcher:
         with open('testdata/this-is-glowfic.html', 'rb') as f:
-            return FakeFetcher({'fic?view=flat': f.read(9999)})
+            html = f.read(9999)
+            return FakeFetcher({'fic': html, 'fic?view=flat': html})
 
 
 if __name__ == '__main__':
 
 
 if __name__ == '__main__':