]> git.scottworley.com Git - paperdoorknob/commitdiff
texfilter to work around \emph nesting issue
authorScott Worley <scottworley@scottworley.com>
Fri, 29 Dec 2023 18:24:14 +0000 (10:24 -0800)
committerScott Worley <scottworley@scottworley.com>
Fri, 29 Dec 2023 19:42:27 +0000 (11:42 -0800)
I don't know enough LaTeX to understand what the problem is, but this
makes it go away.

args.py
paperdoorknob.py
paperdoorknob_test.py
setup.py
spec.py
texfilter.py [new file with mode: 0644]
texfilter_test.py [new file with mode: 0644]

diff --git a/args.py b/args.py
index 8f77a887547b403d311ee0939a5a423b168aa88a..384e4487b76bfb918f7316dee8604276f92e1cc5 100644 (file)
--- a/args.py
+++ b/args.py
@@ -17,6 +17,7 @@ from domfilter import ApplyDOMFilters, DOMFilters
 from fetch import CachingFetcher
 from glowfic import BesideIconLayout, BelowIconLayout, Layout
 from htmlfilter import ApplyHTMLFilters, HTMLFilters
+from texfilter import ApplyTexFilters, TexFilters
 from images import DiskImageStore
 from spec import Spec
 from texify import PandocTexifier
@@ -59,6 +60,10 @@ See https://faculty.bard.edu/bloch/geometry.pdf for details
              '(eg: "%(default)s" produces %(default)s.tex, %(default)s.pdf, etc.)',
         default='book')
     parser.add_argument('--pandoc', help='Location of the pandoc executable')
+    parser.add_argument(
+        '--texfilters',
+        help='Which TeX filters to use (default: %(default)s)',
+        default=','.join(f[0] for f in TexFilters))
     parser.add_argument(
         '--timeout',
         help='How long to wait for HTTP requests, in seconds',
@@ -88,6 +93,7 @@ def spec_from_commandline_args() -> Iterator[Spec]:
                 DiskImageStore(args.out + '_images', fetcher),
                 lambda x: ApplyHTMLFilters(args.htmlfilters, x),
                 lambda x: ApplyDOMFilters(args.domfilters, x),
+                lambda x: ApplyTexFilters(args.texfilters, x),
                 layout,
                 args.geometry,
                 texout)
index b5b7a44a433f2410d64fb12c021348fac25171b8..b364f4d63653d3fa486ca764bb60768967b60763 100644 (file)
@@ -34,7 +34,7 @@ def process(spec: Spec) -> None:
     for r in chunkDOMs(html):
         spec.domfilter(r)
         chunk = makeChunk(r, spec.images)
-        spec.texout.write(spec.layout.renderChunk(chunk))
+        spec.texout.write(spec.texfilter(spec.layout.renderChunk(chunk)))
     spec.texout.write(b'\\end{document}\n')
 
 
index a608f8bf89ccfee8facdfd1912ccc12bbd83909c..6481803805cde8da5fd1b76ab9a3aa3e099b65f7 100644 (file)
@@ -42,6 +42,7 @@ class BaseTestProcess(ABC):
             FakeImageStore(),
             lambda x: x,
             lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
+            lambda x: x,
             ContentOnlyLayout(PandocTexifier('pandoc')),
             'margin=20mm',
             buf)
@@ -68,6 +69,7 @@ Pretty sure.
             FakeImageStore(),
             lambda x: x,
             lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
+            lambda x: x,
             ContentOnlyLayout(texifier),
             None,
             buf)
@@ -81,6 +83,7 @@ Pretty sure.
                 FakeImageStore(),
                 lambda x: x,
                 lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
+                lambda x: x,
                 BelowIconLayout(PandocTexifier('pandoc'), 20),
                 None,
                 out)
index 5e748fd4622e4a68bdfeb4f4da6bdd89ade1194f..e21bc146cade8eaab386a6b7220ce7ea0122e1b2 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,7 @@ setup(
         'images',
         'paperdoorknob',
         'spec',
+        'texfilter',
         'texify',
         'version',
     ],
diff --git a/spec.py b/spec.py
index 34e2e6b3593bf34a7ec773c20ac6a08de67d8797..77a6a127ceff5da3fa273268ddad11bdcb4583ad 100644 (file)
--- a/spec.py
+++ b/spec.py
@@ -24,6 +24,7 @@ class Spec:
     images: ImageStore
     htmlfilter: Callable[[bytes], bytes]
     domfilter: Callable[[Tag], None]
+    texfilter: Callable[[bytes], bytes]
     layout: Layout
     geometry: str | None
     texout: IO[bytes]
diff --git a/texfilter.py b/texfilter.py
new file mode 100644 (file)
index 0000000..8103c19
--- /dev/null
@@ -0,0 +1,32 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+import re
+
+from typing import Callable, List, Tuple
+
+
+TexFilters: List[Tuple[str, Callable[[bytes], bytes]]] = [
+    # Work around `Extra }, or forgotten \endgroup.`
+    ("FixBareEmph", lambda x: re.sub(
+        b'(^|\n)(\\\\emph{)', b'\\1\\\\hspace{0pt}\\2', x)),
+]
+
+
+class TexFilterError(Exception):
+    pass
+
+
+def ApplyTexFilters(filter_list: str, data: bytes) -> bytes:
+    for filter_name in filter_list.split(','):
+        filters = [f for (name, f) in TexFilters if name == filter_name]
+        if len(filters) == 0:
+            raise TexFilterError(f"Unknown Tex filter: {filter_name}")
+        if len(filters) > 1:
+            raise TexFilterError(
+                f"Multiple Tex filters with the same name!?: {filter_name}")
+        data = filters[0](data)
+    return data
diff --git a/texfilter_test.py b/texfilter_test.py
new file mode 100644 (file)
index 0000000..8b603a3
--- /dev/null
@@ -0,0 +1,24 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import unittest
+from texfilter import ApplyTexFilters
+
+
+class TestTexFilters(unittest.TestCase):
+
+    def testStripNBSP(self) -> None:
+        self.assertEqual(
+            ApplyTexFilters("FixBareEmph", b'\\emph{Yes?}'),
+            b'\\hspace{0pt}\\emph{Yes?}')
+        self.assertEqual(
+            ApplyTexFilters("FixBareEmph", b'Reassuring.\n\n\\emph{Yes?}'),
+            b'Reassuring.\n\n\\hspace{0pt}\\emph{Yes?}')
+
+
+if __name__ == '__main__':
+    unittest.main()