From 131deef1eba442d6e05c2ff6c6f7669b4b9e2b24 Mon Sep 17 00:00:00 2001 From: Scott Worley Date: Fri, 29 Dec 2023 10:24:14 -0800 Subject: [PATCH] texfilter to work around \emph nesting issue I don't know enough LaTeX to understand what the problem is, but this makes it go away. --- args.py | 6 ++++++ paperdoorknob.py | 2 +- paperdoorknob_test.py | 3 +++ setup.py | 1 + spec.py | 1 + texfilter.py | 32 ++++++++++++++++++++++++++++++++ texfilter_test.py | 24 ++++++++++++++++++++++++ 7 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 texfilter.py create mode 100644 texfilter_test.py diff --git a/args.py b/args.py index 8f77a88..384e448 100644 --- a/args.py +++ b/args.py @@ -17,6 +17,7 @@ from domfilter import ApplyDOMFilters, DOMFilters from fetch import CachingFetcher from glowfic import BesideIconLayout, BelowIconLayout, Layout from htmlfilter import ApplyHTMLFilters, HTMLFilters +from texfilter import ApplyTexFilters, TexFilters from images import DiskImageStore from spec import Spec from texify import PandocTexifier @@ -59,6 +60,10 @@ See https://faculty.bard.edu/bloch/geometry.pdf for details '(eg: "%(default)s" produces %(default)s.tex, %(default)s.pdf, etc.)', default='book') parser.add_argument('--pandoc', help='Location of the pandoc executable') + parser.add_argument( + '--texfilters', + help='Which TeX filters to use (default: %(default)s)', + default=','.join(f[0] for f in TexFilters)) parser.add_argument( '--timeout', help='How long to wait for HTTP requests, in seconds', @@ -88,6 +93,7 @@ def spec_from_commandline_args() -> Iterator[Spec]: DiskImageStore(args.out + '_images', fetcher), lambda x: ApplyHTMLFilters(args.htmlfilters, x), lambda x: ApplyDOMFilters(args.domfilters, x), + lambda x: ApplyTexFilters(args.texfilters, x), layout, args.geometry, texout) diff --git a/paperdoorknob.py b/paperdoorknob.py index b5b7a44..b364f4d 100644 --- a/paperdoorknob.py +++ b/paperdoorknob.py @@ -34,7 +34,7 @@ def process(spec: Spec) -> None: for r in chunkDOMs(html): spec.domfilter(r) chunk = makeChunk(r, spec.images) - spec.texout.write(spec.layout.renderChunk(chunk)) + spec.texout.write(spec.texfilter(spec.layout.renderChunk(chunk))) spec.texout.write(b'\\end{document}\n') diff --git a/paperdoorknob_test.py b/paperdoorknob_test.py index a608f8b..6481803 100644 --- a/paperdoorknob_test.py +++ b/paperdoorknob_test.py @@ -42,6 +42,7 @@ class BaseTestProcess(ABC): FakeImageStore(), lambda x: x, lambda x: ApplyDOMFilters('NoEdit,NoFooter', x), + lambda x: x, ContentOnlyLayout(PandocTexifier('pandoc')), 'margin=20mm', buf) @@ -68,6 +69,7 @@ Pretty sure. FakeImageStore(), lambda x: x, lambda x: ApplyDOMFilters('NoEdit,NoFooter', x), + lambda x: x, ContentOnlyLayout(texifier), None, buf) @@ -81,6 +83,7 @@ Pretty sure. FakeImageStore(), lambda x: x, lambda x: ApplyDOMFilters('NoEdit,NoFooter', x), + lambda x: x, BelowIconLayout(PandocTexifier('pandoc'), 20), None, out) diff --git a/setup.py b/setup.py index 5e748fd..e21bc14 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ setup( 'images', 'paperdoorknob', 'spec', + 'texfilter', 'texify', 'version', ], diff --git a/spec.py b/spec.py index 34e2e6b..77a6a12 100644 --- a/spec.py +++ b/spec.py @@ -24,6 +24,7 @@ class Spec: images: ImageStore htmlfilter: Callable[[bytes], bytes] domfilter: Callable[[Tag], None] + texfilter: Callable[[bytes], bytes] layout: Layout geometry: str | None texout: IO[bytes] diff --git a/texfilter.py b/texfilter.py new file mode 100644 index 0000000..8103c19 --- /dev/null +++ b/texfilter.py @@ -0,0 +1,32 @@ +# paperdoorknob: Print glowfic +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, version 3. + +import re + +from typing import Callable, List, Tuple + + +TexFilters: List[Tuple[str, Callable[[bytes], bytes]]] = [ + # Work around `Extra }, or forgotten \endgroup.` + ("FixBareEmph", lambda x: re.sub( + b'(^|\n)(\\\\emph{)', b'\\1\\\\hspace{0pt}\\2', x)), +] + + +class TexFilterError(Exception): + pass + + +def ApplyTexFilters(filter_list: str, data: bytes) -> bytes: + for filter_name in filter_list.split(','): + filters = [f for (name, f) in TexFilters if name == filter_name] + if len(filters) == 0: + raise TexFilterError(f"Unknown Tex filter: {filter_name}") + if len(filters) > 1: + raise TexFilterError( + f"Multiple Tex filters with the same name!?: {filter_name}") + data = filters[0](data) + return data diff --git a/texfilter_test.py b/texfilter_test.py new file mode 100644 index 0000000..8b603a3 --- /dev/null +++ b/texfilter_test.py @@ -0,0 +1,24 @@ +# paperdoorknob: Print glowfic +# +# This program is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation, version 3. + + +import unittest +from texfilter import ApplyTexFilters + + +class TestTexFilters(unittest.TestCase): + + def testStripNBSP(self) -> None: + self.assertEqual( + ApplyTexFilters("FixBareEmph", b'\\emph{Yes?}'), + b'\\hspace{0pt}\\emph{Yes?}') + self.assertEqual( + ApplyTexFilters("FixBareEmph", b'Reassuring.\n\n\\emph{Yes?}'), + b'Reassuring.\n\n\\hspace{0pt}\\emph{Yes?}') + + +if __name__ == '__main__': + unittest.main() -- 2.44.1