]> git.scottworley.com Git - paperdoorknob/commitdiff
texfilter to work around \emph nesting issue
authorScott Worley <scottworley@scottworley.com>
Fri, 29 Dec 2023 18:24:14 +0000 (10:24 -0800)
committerScott Worley <scottworley@scottworley.com>
Fri, 29 Dec 2023 19:42:27 +0000 (11:42 -0800)
I don't know enough LaTeX to understand what the problem is, but this
makes it go away.

args.py
paperdoorknob.py
paperdoorknob_test.py
setup.py
spec.py
texfilter.py [new file with mode: 0644]
texfilter_test.py [new file with mode: 0644]

diff --git a/args.py b/args.py
index 8f77a887547b403d311ee0939a5a423b168aa88a..384e4487b76bfb918f7316dee8604276f92e1cc5 100644 (file)
--- a/args.py
+++ b/args.py
@@ -17,6 +17,7 @@ from domfilter import ApplyDOMFilters, DOMFilters
 from fetch import CachingFetcher
 from glowfic import BesideIconLayout, BelowIconLayout, Layout
 from htmlfilter import ApplyHTMLFilters, HTMLFilters
 from fetch import CachingFetcher
 from glowfic import BesideIconLayout, BelowIconLayout, Layout
 from htmlfilter import ApplyHTMLFilters, HTMLFilters
+from texfilter import ApplyTexFilters, TexFilters
 from images import DiskImageStore
 from spec import Spec
 from texify import PandocTexifier
 from images import DiskImageStore
 from spec import Spec
 from texify import PandocTexifier
@@ -59,6 +60,10 @@ See https://faculty.bard.edu/bloch/geometry.pdf for details
              '(eg: "%(default)s" produces %(default)s.tex, %(default)s.pdf, etc.)',
         default='book')
     parser.add_argument('--pandoc', help='Location of the pandoc executable')
              '(eg: "%(default)s" produces %(default)s.tex, %(default)s.pdf, etc.)',
         default='book')
     parser.add_argument('--pandoc', help='Location of the pandoc executable')
+    parser.add_argument(
+        '--texfilters',
+        help='Which TeX filters to use (default: %(default)s)',
+        default=','.join(f[0] for f in TexFilters))
     parser.add_argument(
         '--timeout',
         help='How long to wait for HTTP requests, in seconds',
     parser.add_argument(
         '--timeout',
         help='How long to wait for HTTP requests, in seconds',
@@ -88,6 +93,7 @@ def spec_from_commandline_args() -> Iterator[Spec]:
                 DiskImageStore(args.out + '_images', fetcher),
                 lambda x: ApplyHTMLFilters(args.htmlfilters, x),
                 lambda x: ApplyDOMFilters(args.domfilters, x),
                 DiskImageStore(args.out + '_images', fetcher),
                 lambda x: ApplyHTMLFilters(args.htmlfilters, x),
                 lambda x: ApplyDOMFilters(args.domfilters, x),
+                lambda x: ApplyTexFilters(args.texfilters, x),
                 layout,
                 args.geometry,
                 texout)
                 layout,
                 args.geometry,
                 texout)
index b5b7a44a433f2410d64fb12c021348fac25171b8..b364f4d63653d3fa486ca764bb60768967b60763 100644 (file)
@@ -34,7 +34,7 @@ def process(spec: Spec) -> None:
     for r in chunkDOMs(html):
         spec.domfilter(r)
         chunk = makeChunk(r, spec.images)
     for r in chunkDOMs(html):
         spec.domfilter(r)
         chunk = makeChunk(r, spec.images)
-        spec.texout.write(spec.layout.renderChunk(chunk))
+        spec.texout.write(spec.texfilter(spec.layout.renderChunk(chunk)))
     spec.texout.write(b'\\end{document}\n')
 
 
     spec.texout.write(b'\\end{document}\n')
 
 
index a608f8bf89ccfee8facdfd1912ccc12bbd83909c..6481803805cde8da5fd1b76ab9a3aa3e099b65f7 100644 (file)
@@ -42,6 +42,7 @@ class BaseTestProcess(ABC):
             FakeImageStore(),
             lambda x: x,
             lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
             FakeImageStore(),
             lambda x: x,
             lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
+            lambda x: x,
             ContentOnlyLayout(PandocTexifier('pandoc')),
             'margin=20mm',
             buf)
             ContentOnlyLayout(PandocTexifier('pandoc')),
             'margin=20mm',
             buf)
@@ -68,6 +69,7 @@ Pretty sure.
             FakeImageStore(),
             lambda x: x,
             lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
             FakeImageStore(),
             lambda x: x,
             lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
+            lambda x: x,
             ContentOnlyLayout(texifier),
             None,
             buf)
             ContentOnlyLayout(texifier),
             None,
             buf)
@@ -81,6 +83,7 @@ Pretty sure.
                 FakeImageStore(),
                 lambda x: x,
                 lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
                 FakeImageStore(),
                 lambda x: x,
                 lambda x: ApplyDOMFilters('NoEdit,NoFooter', x),
+                lambda x: x,
                 BelowIconLayout(PandocTexifier('pandoc'), 20),
                 None,
                 out)
                 BelowIconLayout(PandocTexifier('pandoc'), 20),
                 None,
                 out)
index 5e748fd4622e4a68bdfeb4f4da6bdd89ade1194f..e21bc146cade8eaab386a6b7220ce7ea0122e1b2 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,7 @@ setup(
         'images',
         'paperdoorknob',
         'spec',
         'images',
         'paperdoorknob',
         'spec',
+        'texfilter',
         'texify',
         'version',
     ],
         'texify',
         'version',
     ],
diff --git a/spec.py b/spec.py
index 34e2e6b3593bf34a7ec773c20ac6a08de67d8797..77a6a127ceff5da3fa273268ddad11bdcb4583ad 100644 (file)
--- a/spec.py
+++ b/spec.py
@@ -24,6 +24,7 @@ class Spec:
     images: ImageStore
     htmlfilter: Callable[[bytes], bytes]
     domfilter: Callable[[Tag], None]
     images: ImageStore
     htmlfilter: Callable[[bytes], bytes]
     domfilter: Callable[[Tag], None]
+    texfilter: Callable[[bytes], bytes]
     layout: Layout
     geometry: str | None
     texout: IO[bytes]
     layout: Layout
     geometry: str | None
     texout: IO[bytes]
diff --git a/texfilter.py b/texfilter.py
new file mode 100644 (file)
index 0000000..8103c19
--- /dev/null
@@ -0,0 +1,32 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+import re
+
+from typing import Callable, List, Tuple
+
+
+TexFilters: List[Tuple[str, Callable[[bytes], bytes]]] = [
+    # Work around `Extra }, or forgotten \endgroup.`
+    ("FixBareEmph", lambda x: re.sub(
+        b'(^|\n)(\\\\emph{)', b'\\1\\\\hspace{0pt}\\2', x)),
+]
+
+
+class TexFilterError(Exception):
+    pass
+
+
+def ApplyTexFilters(filter_list: str, data: bytes) -> bytes:
+    for filter_name in filter_list.split(','):
+        filters = [f for (name, f) in TexFilters if name == filter_name]
+        if len(filters) == 0:
+            raise TexFilterError(f"Unknown Tex filter: {filter_name}")
+        if len(filters) > 1:
+            raise TexFilterError(
+                f"Multiple Tex filters with the same name!?: {filter_name}")
+        data = filters[0](data)
+    return data
diff --git a/texfilter_test.py b/texfilter_test.py
new file mode 100644 (file)
index 0000000..8b603a3
--- /dev/null
@@ -0,0 +1,24 @@
+# paperdoorknob: Print glowfic
+#
+# This program is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation, version 3.
+
+
+import unittest
+from texfilter import ApplyTexFilters
+
+
+class TestTexFilters(unittest.TestCase):
+
+    def testStripNBSP(self) -> None:
+        self.assertEqual(
+            ApplyTexFilters("FixBareEmph", b'\\emph{Yes?}'),
+            b'\\hspace{0pt}\\emph{Yes?}')
+        self.assertEqual(
+            ApplyTexFilters("FixBareEmph", b'Reassuring.\n\n\\emph{Yes?}'),
+            b'Reassuring.\n\n\\hspace{0pt}\\emph{Yes?}')
+
+
+if __name__ == '__main__':
+    unittest.main()