]> git.scottworley.com Git - paperdoorknob/commitdiff
Texifier interface
authorScott Worley <scottworley@scottworley.com>
Tue, 19 Dec 2023 09:45:20 +0000 (01:45 -0800)
committerScott Worley <scottworley@scottworley.com>
Wed, 20 Dec 2023 04:10:43 +0000 (20:10 -0800)
paperdoorknob.py
paperdoorknob_test.py
setup.py
texify.py [new file with mode: 0644]

index 31ce02c494ea29aadc8794d75f11ee8397729a2c..3b86cb8da08144c3862ab80b757b0194c086dbad 100644 (file)
@@ -8,7 +8,6 @@
 from argparse import ArgumentParser
 import itertools
 import os.path
 from argparse import ArgumentParser
 import itertools
 import os.path
-import subprocess
 
 from typing import IO, Iterable
 
 
 from typing import IO, Iterable
 
@@ -17,6 +16,7 @@ from bs4.element import Tag
 from xdg_base_dirs import xdg_cache_home
 
 from fetch import CachingFetcher, Fetcher
 from xdg_base_dirs import xdg_cache_home
 
 from fetch import CachingFetcher, Fetcher
+from texify import PandocTexifier, Texifier
 
 
 def command_line_parser() -> ArgumentParser:
 
 
 def command_line_parser() -> ArgumentParser:
@@ -68,30 +68,24 @@ def replies(html: BeautifulSoup) -> Iterable[Tag]:
     return itertools.chain([text()], the_replies())
 
 
     return itertools.chain([text()], the_replies())
 
 
-def html_to_tex(pandoc: str, tag: Tag) -> bytes:
-    return subprocess.run([pandoc, '--from=html', '--to=latex'],
-                          input=tag.encode(),
-                          stdout=subprocess.PIPE,
-                          check=True).stdout
-
-
 def process(
         url: str,
         fetcher: Fetcher,
 def process(
         url: str,
         fetcher: Fetcher,
-        texout: IO[bytes],
-        pandoc: str) -> None:
+        texifier: Texifier,
+        texout: IO[bytes]) -> None:
     texout.write(b'\\documentclass{article}\n\\begin{document}\n')
     html = clean(parse(fetcher.fetch(url)))
     for r in replies(html):
     texout.write(b'\\documentclass{article}\n\\begin{document}\n')
     html = clean(parse(fetcher.fetch(url)))
     for r in replies(html):
-        texout.write(html_to_tex(pandoc, r))
+        texout.write(texifier.texify(r))
     texout.write(b'\\end{document}\n')
 
 
 def main() -> None:
     args = command_line_parser().parse_args()
     texout.write(b'\\end{document}\n')
 
 
 def main() -> None:
     args = command_line_parser().parse_args()
+    texifier = PandocTexifier(args.pandoc or 'pandoc')
     with CachingFetcher(args.cache_path, args.timeout) as fetcher:
         with open(args.out + '.tex', 'wb') as texout:
     with CachingFetcher(args.cache_path, args.timeout) as fetcher:
         with open(args.out + '.tex', 'wb') as texout:
-            process(args.url, fetcher, texout, args.pandoc or 'pandoc')
+            process(args.url, fetcher, texifier, texout)
 
 
 if __name__ == '__main__':
 
 
 if __name__ == '__main__':
index 9676e286aaae11eb1959cb5868af85e6f3681d25..453f5a6338dfe5513b3e2cf68160786a4deb72c1 100644 (file)
@@ -11,6 +11,7 @@ import subprocess
 import paperdoorknob
 from testing.fakeserver import FakeGlowficServer
 from fetch import DirectFetcher
 import paperdoorknob
 from testing.fakeserver import FakeGlowficServer
 from fetch import DirectFetcher
+from texify import PandocTexifier
 
 TIMEOUT = 8
 
 
 TIMEOUT = 8
 
@@ -30,10 +31,11 @@ class TestPaperDoorknob(unittest.TestCase):
                              ["This is glowfic", "You sure?", "Pretty sure."])
 
     def testProcess(self) -> None:
                              ["This is glowfic", "You sure?", "Pretty sure."])
 
     def testProcess(self) -> None:
+        texifier = PandocTexifier('pandoc')
         with DirectFetcher(TIMEOUT) as f:
             buf = io.BytesIO()
             paperdoorknob.process(
         with DirectFetcher(TIMEOUT) as f:
             buf = io.BytesIO()
             paperdoorknob.process(
-                f"http://localhost:{self._port}", f, buf, 'pandoc')
+                f"http://localhost:{self._port}", f, texifier, buf)
             self.assertEqual(buf.getvalue(), b'''\\documentclass{article}
 \\begin{document}
 This is glowfic
             self.assertEqual(buf.getvalue(), b'''\\documentclass{article}
 \\begin{document}
 This is glowfic
@@ -43,10 +45,11 @@ Pretty sure.
 ''')
 
     def testPDF(self) -> None:
 ''')
 
     def testPDF(self) -> None:
+        texifier = PandocTexifier('pandoc')
         with DirectFetcher(TIMEOUT) as f:
             with open("test.tex", 'wb') as out:
                 paperdoorknob.process(
         with DirectFetcher(TIMEOUT) as f:
             with open("test.tex", 'wb') as out:
                 paperdoorknob.process(
-                    f"http://localhost:{self._port}", f, out, 'pandoc')
+                    f"http://localhost:{self._port}", f, texifier, out)
             subprocess.run(['pdflatex', 'test.tex'],
                            stdin=subprocess.DEVNULL, check=True)
 
             subprocess.run(['pdflatex', 'test.tex'],
                            stdin=subprocess.DEVNULL, check=True)
 
index 27980a1131a6dc2b619b4a509589ef4987ba0981..6464879c1ece96c2dc0350829eb7ba4a7b2ba091 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -10,6 +10,7 @@ setup(
     py_modules=[
         'fetch',
         'paperdoorknob',
     py_modules=[
         'fetch',
         'paperdoorknob',
+        'texify',
     ],
     license="GPL-3.0",
     entry_points={'console_scripts': ['paperdoorknob = paperdoorknob:main']},
     ],
     license="GPL-3.0",
     entry_points={'console_scripts': ['paperdoorknob = paperdoorknob:main']},
diff --git a/texify.py b/texify.py
new file mode 100644 (file)
index 0000000..4fb1557
--- /dev/null
+++ b/texify.py
@@ -0,0 +1,29 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+from abc import ABC, abstractmethod
+import subprocess
+
+from bs4.element import Tag
+
+
+class Texifier(ABC):
+    @abstractmethod
+    def texify(self, html: Tag) -> bytes:
+        raise NotImplementedError()
+
+
+class PandocTexifier(Texifier):
+
+    def __init__(self, pandoc_path: str) -> None:
+        self._pandoc_path = pandoc_path
+
+    def texify(self, html: Tag) -> bytes:
+        return subprocess.run([self._pandoc_path, '--from=html', '--to=latex'],
+                              input=html.encode(),
+                              stdout=subprocess.PIPE,
+                              check=True).stdout