# Free Software Foundation, version 3.
+from dataclasses import dataclass
import itertools
from typing import Iterable
from bs4 import BeautifulSoup
from bs4.element import Tag
+from images import ImageStore
+
+
+@dataclass(frozen=True)
+class Chunk:
+ icon: str | None
+ character: str | None
+ screen_name: str | None
+ author: str | None
+ content: Tag
+
# We avoid the name "post" because the Glowfic community uses the term
# inconsistently:
# * The Glowfic software sometimes uses "post" to refer to a whole thread
-# (eg: in the URL), but more often uses "post" to refer to just the first
-# chunk in a thread. The non-first chunks are "replies".
+# (in the URL), sometimes uses "post" to refer to chunks (in the CSS),
+# but mostly uses "post" to refer to just the first chunk in a thread
+# (in the HTML and UI). The non-first chunks are "replies".
# * Readers and this software don't need to distinguish first-chunks and
# non-first-chunks.
-# * Humans in the community tend to use "posts" to mean "chunks" ("replies"
-# in the Glowfic software's lexicon).
+# * Humans in the community tend to use "posts" to mean chunks.
def chunkDOMs(html: BeautifulSoup) -> Iterable[Tag]:
return rs
return itertools.chain([text()], the_replies())
+
+
+def makeChunk(chunk_dom: Tag, image_store: ImageStore) -> Chunk:
+
+ def getIcon() -> str | None:
+ icon_div = chunk_dom.find_next('div', class_='post-icon')
+ if icon_div is None:
+ return None
+ icon_img = icon_div.find_next('img')
+ if icon_img is None:
+ return None
+ assert isinstance(icon_img, Tag)
+ return image_store.get_image(icon_img.attrs['src'])
+
+ def getTextByClass(css_class: str) -> str | None:
+ div = chunk_dom.find_next('div', class_=css_class)
+ if div is None:
+ return None
+ return div.text.strip()
+
+ content = chunk_dom.find_next('div', class_='post-content')
+ assert isinstance(content, Tag)
+
+ return Chunk(getIcon(),
+ getTextByClass('post-character'),
+ getTextByClass('post-screenname'),
+ getTextByClass('post-author'),
+ content)
from bs4 import BeautifulSoup
-from glowfic import chunkDOMs
+from images import FakeImageStore
+from glowfic import chunkDOMs, makeChunk
class TestSplit(unittest.TestCase):
[['The "post"'], ['1st reply'], ['2nd reply']])
+class TestMakeChunk(unittest.TestCase):
+
+ def testEmptyContent(self) -> None:
+ with open('testdata/empty-content.html', 'rb') as f:
+ soup = BeautifulSoup(f, 'html.parser')
+ c = makeChunk(next(iter(chunkDOMs(soup))), FakeImageStore())
+ self.assertEqual(
+ c.icon,
+ 'stored:https://d1anwqy6ci9o1i.cloudfront.net/' +
+ 'users%2F366%2Ficons%2Fxqmypqvflgdy28aorw9ml_shock.png')
+ self.assertEqual(c.character, 'Keltham')
+ self.assertEqual(c.screen_name, 'lawful chaotic')
+ self.assertEqual(c.author, 'Iarwain')
+ self.assertEqual(str(c.content),
+ '<div class="post-content"><p></p></div>')
+
+
if __name__ == '__main__':
unittest.main()
--- /dev/null
+<html>
+ <body>
+ <div class="post-container post-post ">
+ <a id="reply-1616124" class="noheight"> </a>
+ <div class="padding-10">
+ <div class="post-info-box">
+ <div class="post-icon">
+ <a href="/icons/200477">
+ <img alt="shock" title="shock" class="icon" src="https://d1anwqy6ci9o1i.cloudfront.net/users%2F366%2Ficons%2Fxqmypqvflgdy28aorw9ml_shock.png">
+ </a>
+ </div>
+ <div class="post-info-text">
+ <div class="post-character">
+ <a href="/characters/11729">Keltham</a>
+ </div>
+ <div class="post-screenname">lawful chaotic</div>
+ <div class="post-author"><a href="/users/366">Iarwain</a></div>
+ </div>
+ </div>
+ <div class="post-edit-box">
+ <a rel="alternate" href="/replies/1616124#reply-1616124">
+ <img title="Permalink" alt="Permalink" src="https://dhtmoj33sf3e0.cloudfront.net/assets/icons/link-bb9df2e290558f33c20c21f4a2a85841eb4ccb1bd09f6266d3e80679f30ccf62.png">
+ </a>
+ </div>
+ <div class="post-content"><p></p></div>
+ </div>
+ <div class="post-footer"><div class="right-align"><div class="padding-5">
+ Posted <span class="post-posted"><time datetime="2021-06-29T02:48:07Z" title="2021-06-29 02:48 UTC">Jun 28, 2021, 7:48 PM</time></span>
+ </div></div></div>
+ </div>
+ </body>
+</html>