From 79631507ecb69945b7cedb10c3aaed4b8dd788b0 Mon Sep 17 00:00:00 2001 From: Scott Worley Date: Tue, 19 Dec 2023 01:45:20 -0800 Subject: [PATCH] Texifier interface --- paperdoorknob.py | 18 ++++++------------ paperdoorknob_test.py | 7 +++++-- setup.py | 1 + texify.py | 29 +++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 14 deletions(-) create mode 100644 texify.py diff --git a/paperdoorknob.py b/paperdoorknob.py index 31ce02c..3b86cb8 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -8,7 +8,6 @@ from argparse import ArgumentParser import itertools import os.path -import subprocess from typing import IO, Iterable @@ -17,6 +16,7 @@ from bs4.element import Tag from xdg_base_dirs import xdg_cache_home from fetch import CachingFetcher, Fetcher +from texify import PandocTexifier, Texifier def command_line_parser() -> ArgumentParser: @@ -68,30 +68,24 @@ def replies(html: BeautifulSoup) -> Iterable[Tag]: return itertools.chain([text()], the_replies()) -def html_to_tex(pandoc: str, tag: Tag) -> bytes: - return subprocess.run([pandoc, '--from=html', '--to=latex'], - input=tag.encode(), - stdout=subprocess.PIPE, - check=True).stdout - - def process( url: str, fetcher: Fetcher, - texout: IO[bytes], - pandoc: str) -> None: + texifier: Texifier, + texout: IO[bytes]) -> None: texout.write(b'\\documentclass{article}\n\\begin{document}\n') html = clean(parse(fetcher.fetch(url))) for r in replies(html): - texout.write(html_to_tex(pandoc, r)) + texout.write(texifier.texify(r)) texout.write(b'\\end{document}\n') def main() -> None: args = command_line_parser().parse_args() + texifier = PandocTexifier(args.pandoc or 'pandoc') with CachingFetcher(args.cache_path, args.timeout) as fetcher: with open(args.out + '.tex', 'wb') as texout: - process(args.url, fetcher, texout, args.pandoc or 'pandoc') + process(args.url, fetcher, texifier, texout) if __name__ == '__main__': diff --git a/paperdoorknob_test.py b/paperdoorknob_test.py index 9676e28..453f5a6 100644 --- a/paperdoorknob_test.py +++ b/paperdoorknob_test.py @@ -11,6 +11,7 @@ import subprocess import paperdoorknob from testing.fakeserver import FakeGlowficServer from fetch import DirectFetcher +from texify import PandocTexifier TIMEOUT = 8 @@ -30,10 +31,11 @@ class TestPaperDoorknob(unittest.TestCase): ["This is glowfic", "You sure?", "Pretty sure."]) def testProcess(self) -> None: + texifier = PandocTexifier('pandoc') with DirectFetcher(TIMEOUT) as f: buf = io.BytesIO() paperdoorknob.process( - f"http://localhost:{self._port}", f, buf, 'pandoc') + f"http://localhost:{self._port}", f, texifier, buf) self.assertEqual(buf.getvalue(), b'''\\documentclass{article} \\begin{document} This is glowfic @@ -43,10 +45,11 @@ Pretty sure. ''') def testPDF(self) -> None: + texifier = PandocTexifier('pandoc') with DirectFetcher(TIMEOUT) as f: with open("test.tex", 'wb') as out: paperdoorknob.process( - f"http://localhost:{self._port}", f, out, 'pandoc') + f"http://localhost:{self._port}", f, texifier, out) subprocess.run(['pdflatex', 'test.tex'], stdin=subprocess.DEVNULL, check=True) diff --git a/setup.py b/setup.py index 27980a1..6464879 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ setup( py_modules=[ 'fetch', 'paperdoorknob', + 'texify', ], license="GPL-3.0", entry_points={'console_scripts': ['paperdoorknob = paperdoorknob:main']}, diff --git a/texify.py b/texify.py new file mode 100644 index 0000000..4fb1557 --- /dev/null +++ b/texify.py @@ -0,0 +1,29 @@ +# paperdoorknob: Print glowfic +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, version 3. + + +from abc import ABC, abstractmethod +import subprocess + +from bs4.element import Tag + + +class Texifier(ABC): + @abstractmethod + def texify(self, html: Tag) -> bytes: + raise NotImplementedError() + + +class PandocTexifier(Texifier): + + def __init__(self, pandoc_path: str) -> None: + self._pandoc_path = pandoc_path + + def texify(self, html: Tag) -> bytes: + return subprocess.run([self._pandoc_path, '--from=html', '--to=latex'], + input=html.encode(), + stdout=subprocess.PIPE, + check=True).stdout -- 2.44.1