# paperdoorknob: Print glowfic # # This program is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, version 3. from abc import ABC, abstractmethod import re import subprocess from bs4.element import Tag class Texifier(ABC): @abstractmethod def texify(self, html: Tag) -> bytes: raise NotImplementedError() class PandocTexifier(Texifier): def __init__(self, pandoc_path: str) -> None: self._pandoc_path = pandoc_path def texify(self, html: Tag) -> bytes: return subprocess.run([self._pandoc_path, '--from=html', '--to=latex'], input=html.encode(), stdout=subprocess.PIPE, check=True).stdout class TexifierError(Exception): pass class DirectTexifier(Texifier): def _texify_children(self, html: Tag) -> bytes: out = b'' for c in html.children: if isinstance(c, str): out += c.encode('UTF-8') elif isinstance(c, Tag): out += self.texify(c).strip() else: raise TexifierError(f"Unsupported PageElement: {type(c)}") return re.sub(b'[ \n]+', b' ', out).strip() + b'\n' def texify(self, html: Tag) -> bytes: if html.name == 'em': return b'\\emph{' + self._texify_children(html).strip() + b'}\n' if html.name == 'a' and 'href' in html.attrs: return b'\\href{' + html.attrs['href'].encode( 'UTF-8') + b'}{' + self._texify_children(html).strip() + b'}\n' return self._texify_children(html) class TexifierVerificationError(Exception): pass class VerifyingTexifier(Texifier): def __init__(self, a: Texifier, b: Texifier) -> None: self._a = a self._b = b def texify(self, html: Tag) -> bytes: aout = self._a.texify(html) bout = self._b.texify(html) if aout != bout: raise TexifierVerificationError(aout, bout) return aout