from abc import ABC, abstractmethod
+import re
import subprocess
from bs4.element import Tag
input=html.encode(),
stdout=subprocess.PIPE,
check=True).stdout
+
+
+class TexifierError(Exception):
+ pass
+
+
+class DirectTexifier(Texifier):
+ def _texify_children(self, html: Tag) -> bytes:
+ out = b''
+ for c in html.children:
+ if isinstance(c, str):
+ out += c.encode('UTF-8')
+ elif isinstance(c, Tag):
+ out += self.texify(c).strip()
+ else:
+ raise TexifierError(f"Unsupported PageElement: {type(c)}")
+ return re.sub(b'[ \n]+', b' ', out).strip() + b'\n'
+
+ def texify(self, html: Tag) -> bytes:
+ if html.name == 'em':
+ return b'\\emph{' + self._texify_children(html).strip() + b'}\n'
+ return self._texify_children(html)
+
+
+class TexifierVerificationError(Exception):
+ pass
+
+
+class VerifyingTexifier(Texifier):
+ def __init__(self, a: Texifier, b: Texifier) -> None:
+ self._a = a
+ self._b = b
+
+ def texify(self, html: Tag) -> bytes:
+ aout = self._a.texify(html)
+ bout = self._b.texify(html)
+ if aout != bout:
+ raise TexifierVerificationError(aout, bout)
+ return aout