]> git.scottworley.com Git - paperdoorknob/blob - texify.py
83a8c671087d47fee49e25553be568032cf2f2c5
[paperdoorknob] / texify.py
1 # paperdoorknob: Print glowfic
2 #
3 # This program is free software: you can redistribute it and/or modify it
4 # under the terms of the GNU General Public License as published by the
5 # Free Software Foundation, version 3.
6
7
8 from abc import ABC, abstractmethod
9 import re
10 import subprocess
11
12 from bs4.element import Tag
13
14
15 class Texifier(ABC):
16 @abstractmethod
17 def texify(self, html: Tag) -> bytes:
18 raise NotImplementedError()
19
20
21 class PandocTexifier(Texifier):
22
23 def __init__(self, pandoc_path: str) -> None:
24 self._pandoc_path = pandoc_path
25
26 def texify(self, html: Tag) -> bytes:
27 return subprocess.run([self._pandoc_path, '--from=html', '--to=latex'],
28 input=html.encode(),
29 stdout=subprocess.PIPE,
30 check=True).stdout
31
32
33 class TexifierError(Exception):
34 pass
35
36
37 class DirectTexifier(Texifier):
38 def _texify_children(self, html: Tag) -> bytes:
39 out = b''
40 for c in html.children:
41 if isinstance(c, str):
42 out += c.encode('UTF-8')
43 elif isinstance(c, Tag):
44 out += self.texify(c).strip()
45 else:
46 raise TexifierError(f"Unsupported PageElement: {type(c)}")
47 return re.sub(b'[ \n]+', b' ', out).strip() + b'\n'
48
49 def texify(self, html: Tag) -> bytes:
50 if html.name == 'em':
51 return b'\\emph{' + self._texify_children(html).strip() + b'}\n'
52 return self._texify_children(html)
53
54
55 class TexifierVerificationError(Exception):
56 pass
57
58
59 class VerifyingTexifier(Texifier):
60 def __init__(self, a: Texifier, b: Texifier) -> None:
61 self._a = a
62 self._b = b
63
64 def texify(self, html: Tag) -> bytes:
65 aout = self._a.texify(html)
66 bout = self._b.texify(html)
67 if aout != bout:
68 raise TexifierVerificationError(aout, bout)
69 return aout