]>
Commit | Line | Data |
---|---|---|
1 | # paperdoorknob: Print glowfic | |
2 | # | |
3 | # This program is free software: you can redistribute it and/or modify it | |
4 | # under the terms of the GNU General Public License as published by the | |
5 | # Free Software Foundation, version 3. | |
6 | ||
7 | ||
8 | from abc import ABC, abstractmethod | |
9 | import re | |
10 | import subprocess | |
11 | ||
12 | from bs4.element import Tag | |
13 | ||
14 | ||
15 | class Texifier(ABC): | |
16 | @abstractmethod | |
17 | def texify(self, html: Tag) -> bytes: | |
18 | raise NotImplementedError() | |
19 | ||
20 | ||
21 | class PandocTexifier(Texifier): | |
22 | ||
23 | def __init__(self, pandoc_path: str) -> None: | |
24 | self._pandoc_path = pandoc_path | |
25 | ||
26 | def texify(self, html: Tag) -> bytes: | |
27 | return subprocess.run([self._pandoc_path, '--from=html', '--to=latex'], | |
28 | input=html.encode(), | |
29 | stdout=subprocess.PIPE, | |
30 | check=True).stdout | |
31 | ||
32 | ||
33 | class TexifierError(Exception): | |
34 | pass | |
35 | ||
36 | ||
37 | class DirectTexifier(Texifier): | |
38 | def _texify_children(self, html: Tag) -> bytes: | |
39 | out = b'' | |
40 | for c in html.children: | |
41 | if isinstance(c, str): | |
42 | out += c.encode('UTF-8') | |
43 | elif isinstance(c, Tag): | |
44 | out += self.texify(c).strip() | |
45 | else: | |
46 | raise TexifierError(f"Unsupported PageElement: {type(c)}") | |
47 | return re.sub(b'[ \n]+', b' ', out).strip() + b'\n' | |
48 | ||
49 | def texify(self, html: Tag) -> bytes: | |
50 | if html.name == 'em': | |
51 | return b'\\emph{' + self._texify_children(html).strip() + b'}\n' | |
52 | return self._texify_children(html) | |
53 | ||
54 | ||
55 | class TexifierVerificationError(Exception): | |
56 | pass | |
57 | ||
58 | ||
59 | class VerifyingTexifier(Texifier): | |
60 | def __init__(self, a: Texifier, b: Texifier) -> None: | |
61 | self._a = a | |
62 | self._b = b | |
63 | ||
64 | def texify(self, html: Tag) -> bytes: | |
65 | aout = self._a.texify(html) | |
66 | bout = self._b.texify(html) | |
67 | if aout != bout: | |
68 | raise TexifierVerificationError(aout, bout) | |
69 | return aout |