from xdg_base_dirs import xdg_cache_home
+from domfilter import ApplyDOMFilters, DOMFilters
from fetch import CachingFetcher
+from htmlfilter import ApplyHTMLFilters, HTMLFilters
+from images import DiskImageStore
from spec import Spec
from texify import PandocTexifier
metavar='PATH',
help='Where to keep the http cache (instead of %(default)s)',
default=os.path.join(xdg_cache_home(), "paperdoorknob"))
+ parser.add_argument(
+ '--domfilters',
+ help='Which DOM filters to use (default: %(default)s)',
+ default=','.join(f[0] for f in DOMFilters))
+ parser.add_argument(
+ '--geometry',
+ help='''Page size and margin control
+See https://faculty.bard.edu/bloch/geometry.pdf for details
+(default: %(default)s)''',
+ default='paperwidth=5.5in,paperheight=8.5in,nohead,' +
+ 'tmargin=15mm,hmargin=15mm,bmargin=17mm,foot=4mm')
+ parser.add_argument(
+ '--htmlfilters',
+ help='Which HTML filters to use (default: %(default)s)',
+ default=','.join(f[0] for f in HTMLFilters))
parser.add_argument(
'--out',
help='The filename stem at which to write output ' +
'--timeout',
help='How long to wait for HTTP requests, in seconds',
default=30)
- parser.add_argument('url', help='URL to retrieve')
+ parser.add_argument(
+ 'url',
+ help='URL to retrieve (example: https://www.projectlawful.com/posts/4582 )')
return parser
args = _command_line_parser().parse_args()
with CachingFetcher(args.cache_path, args.timeout) as fetcher:
with open(args.out + '.tex', 'wb') as texout:
- yield Spec(args.url, fetcher, PandocTexifier(args.pandoc or 'pandoc'), texout)
+ yield Spec(
+ args.url,
+ fetcher,
+ DiskImageStore(args.out + '_images', fetcher),
+ lambda x: ApplyHTMLFilters(args.htmlfilters, x),
+ lambda x: ApplyDOMFilters(args.domfilters, x),
+ PandocTexifier(args.pandoc or 'pandoc'),
+ args.geometry,
+ texout)