]> git.scottworley.com Git - paperdoorknob/blobdiff - args.py
More structure and tests around splitting the page into chunks' DOMs.
[paperdoorknob] / args.py
diff --git a/args.py b/args.py
index 1931d4271f242dc918287d28a7bedf2e4ee71f81..d90de8a78b7e32207c66fe6ee19d7939de634483 100644 (file)
--- a/args.py
+++ b/args.py
@@ -13,8 +13,10 @@ from typing import Iterator
 
 from xdg_base_dirs import xdg_cache_home
 
+from domfilter import ApplyDOMFilters, DOMFilters
 from fetch import CachingFetcher
 from htmlfilter import ApplyHTMLFilters, HTMLFilters
+from images import ImageStore
 from spec import Spec
 from texify import PandocTexifier
 
@@ -26,6 +28,17 @@ def _command_line_parser() -> ArgumentParser:
         metavar='PATH',
         help='Where to keep the http cache (instead of %(default)s)',
         default=os.path.join(xdg_cache_home(), "paperdoorknob"))
+    parser.add_argument(
+        '--domfilters',
+        help='Which DOM filters to use (default: %(default)s)',
+        default=','.join(f[0] for f in DOMFilters))
+    parser.add_argument(
+        '--geometry',
+        help='''Page size and margin control
+See https://faculty.bard.edu/bloch/geometry.pdf for details
+(default: %(default)s)''',
+        default='paperwidth=5.5in,paperheight=8.5in,nohead,' +
+        'tmargin=15mm,hmargin=15mm,bmargin=17mm,foot=4mm')
     parser.add_argument(
         '--htmlfilters',
         help='Which HTML filters to use (default: %(default)s)',
@@ -40,7 +53,9 @@ def _command_line_parser() -> ArgumentParser:
         '--timeout',
         help='How long to wait for HTTP requests, in seconds',
         default=30)
-    parser.add_argument('url', help='URL to retrieve')
+    parser.add_argument(
+        'url',
+        help='URL to retrieve (example: https://www.projectlawful.com/posts/4582 )')
     return parser
 
 
@@ -52,6 +67,9 @@ def spec_from_commandline_args() -> Iterator[Spec]:
             yield Spec(
                 args.url,
                 fetcher,
+                ImageStore(args.out + '_images', fetcher),
                 lambda x: ApplyHTMLFilters(args.htmlfilters, x),
+                lambda x: ApplyDOMFilters(args.domfilters, x),
                 PandocTexifier(args.pandoc or 'pandoc'),
+                args.geometry,
                 texout)