from dataclasses import dataclass
import itertools
-from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
+from urllib.parse import parse_qsl, urlencode, urljoin, urlparse, urlunparse
from typing import Iterable
for a in c.findChildren('a'):
if 'Next Post' in a.text and 'href' in a.attrs and isinstance(
a.attrs['href'], str):
- return a.attrs['href']
+ return urljoin(spec.url, a.attrs['href'])
return None
spec.log('Fetching HTML...\r')
</body></html>'''))
self.assertEqual(t.title(), 'Teh Story!')
- def testNextThread(self) -> None:
+ def testNextThreadRelative(self) -> None:
t = Thread(spec_for_testing(b'''
<html><body>
<div class="post-navheader">
</div>
<div class="post-container post-post">The "post"</div>
</body></html>'''))
- self.assertEqual(t.next_thread(), '/posts/4567')
+ self.assertEqual(t.next_thread(), 'https://fake/posts/4567')
+
+ def testNextThreadAbsolute(self) -> None:
+ t = Thread(spec_for_testing(b'''
+ <html><body>
+ <div class="post-navheader">
+ <a class="view-button-link" href="https://elsewhere/posts/4567"><div class="view-button">Next Post »</div>
+ </a><a class="view-button-link" href="https://elsewhere/posts/4321"><div class="view-button float-none">« Previous Post</div>
+ </a>
+ </div>
+ <div class="post-container post-post">The "post"</div>
+ </body></html>'''))
+ self.assertEqual(t.next_thread(), 'https://elsewhere/posts/4567')
if __name__ == '__main__':