X-Git-Url: http://git.scottworley.com/paperdoorknob/blobdiff_plain/23f3187945d1cf4f3a9cdc43462be2ca39e7023a..91fe9916122adaee2cf1695040f906d709e1aa1c:/args.py?ds=sidebyside diff --git a/args.py b/args.py index 2a03836..f343a84 100644 --- a/args.py +++ b/args.py @@ -13,7 +13,9 @@ from typing import Iterator from xdg_base_dirs import xdg_cache_home +from domfilter import ApplyDOMFilters, DOMFilters from fetch import CachingFetcher +from htmlfilter import ApplyHTMLFilters, HTMLFilters from spec import Spec from texify import PandocTexifier @@ -25,6 +27,21 @@ def _command_line_parser() -> ArgumentParser: metavar='PATH', help='Where to keep the http cache (instead of %(default)s)', default=os.path.join(xdg_cache_home(), "paperdoorknob")) + parser.add_argument( + '--domfilters', + help='Which DOM filters to use (default: %(default)s)', + default=','.join(f[0] for f in DOMFilters)) + parser.add_argument( + '--geometry', + help='''Page size and margin control +See https://faculty.bard.edu/bloch/geometry.pdf for details +(default: %(default)s)''', + default='paperwidth=5.5in,paperheight=8.5in,nohead,' + + 'tmargin=15mm,hmargin=15mm,bmargin=17mm,foot=4mm') + parser.add_argument( + '--htmlfilters', + help='Which HTML filters to use (default: %(default)s)', + default=','.join(f[0] for f in HTMLFilters)) parser.add_argument( '--out', help='The filename stem at which to write output ' + @@ -35,7 +52,9 @@ def _command_line_parser() -> ArgumentParser: '--timeout', help='How long to wait for HTTP requests, in seconds', default=30) - parser.add_argument('url', help='URL to retrieve') + parser.add_argument( + 'url', + help='URL to retrieve (example: https://www.projectlawful.com/posts/4582 )') return parser @@ -44,4 +63,11 @@ def spec_from_commandline_args() -> Iterator[Spec]: args = _command_line_parser().parse_args() with CachingFetcher(args.cache_path, args.timeout) as fetcher: with open(args.out + '.tex', 'wb') as texout: - yield Spec(args.url, fetcher, PandocTexifier(args.pandoc or 'pandoc'), texout) + yield Spec( + args.url, + fetcher, + lambda x: ApplyHTMLFilters(args.htmlfilters, x), + lambda x: ApplyDOMFilters(args.domfilters, x), + PandocTexifier(args.pandoc or 'pandoc'), + args.geometry, + texout)