From: Scott Worley Date: Tue, 19 Dec 2023 10:23:48 +0000 (-0800) Subject: Contemplate generating LaTeX directly X-Git-Url: http://git.scottworley.com/paperdoorknob/commitdiff_plain/f1dec72014657ab33bffa8f68064d85cc862ea65?hp=79631507ecb69945b7cedb10c3aaed4b8dd788b0 Contemplate generating LaTeX directly --- diff --git a/paperdoorknob_test.py b/paperdoorknob_test.py index 453f5a6..4cead97 100644 --- a/paperdoorknob_test.py +++ b/paperdoorknob_test.py @@ -11,7 +11,7 @@ import subprocess import paperdoorknob from testing.fakeserver import FakeGlowficServer from fetch import DirectFetcher -from texify import PandocTexifier +from texify import DirectTexifier, PandocTexifier, VerifyingTexifier TIMEOUT = 8 @@ -44,6 +44,14 @@ Pretty sure. \\end{document} ''') + def testDirectTexifier(self) -> None: + texifier = VerifyingTexifier( + PandocTexifier('pandoc'), DirectTexifier()) + with DirectFetcher(TIMEOUT) as f: + buf = io.BytesIO() + paperdoorknob.process( + f"http://localhost:{self._port}", f, texifier, buf) + def testPDF(self) -> None: texifier = PandocTexifier('pandoc') with DirectFetcher(TIMEOUT) as f: diff --git a/texify.py b/texify.py index 4fb1557..83a8c67 100644 --- a/texify.py +++ b/texify.py @@ -6,6 +6,7 @@ from abc import ABC, abstractmethod +import re import subprocess from bs4.element import Tag @@ -27,3 +28,42 @@ class PandocTexifier(Texifier): input=html.encode(), stdout=subprocess.PIPE, check=True).stdout + + +class TexifierError(Exception): + pass + + +class DirectTexifier(Texifier): + def _texify_children(self, html: Tag) -> bytes: + out = b'' + for c in html.children: + if isinstance(c, str): + out += c.encode('UTF-8') + elif isinstance(c, Tag): + out += self.texify(c).strip() + else: + raise TexifierError(f"Unsupported PageElement: {type(c)}") + return re.sub(b'[ \n]+', b' ', out).strip() + b'\n' + + def texify(self, html: Tag) -> bytes: + if html.name == 'em': + return b'\\emph{' + self._texify_children(html).strip() + b'}\n' + return self._texify_children(html) + + +class TexifierVerificationError(Exception): + pass + + +class VerifyingTexifier(Texifier): + def __init__(self, a: Texifier, b: Texifier) -> None: + self._a = a + self._b = b + + def texify(self, html: Tag) -> bytes: + aout = self._a.texify(html) + bout = self._b.texify(html) + if aout != bout: + raise TexifierVerificationError(aout, bout) + return aout