from xdg_base_dirs import xdg_cache_home
+from domfilter import ApplyDOMFilters, DOMFilters
from fetch import CachingFetcher
+from htmlfilter import ApplyHTMLFilters, HTMLFilters
from spec import Spec
from texify import PandocTexifier
metavar='PATH',
help='Where to keep the http cache (instead of %(default)s)',
default=os.path.join(xdg_cache_home(), "paperdoorknob"))
+ parser.add_argument(
+ '--domfilters',
+ help='Which DOM filters to use (default: %(default)s)',
+ default=','.join(f[0] for f in DOMFilters))
+ parser.add_argument(
+ '--htmlfilters',
+ help='Which HTML filters to use (default: %(default)s)',
+ default=','.join(f[0] for f in HTMLFilters))
parser.add_argument(
'--out',
help='The filename stem at which to write output ' +
args = _command_line_parser().parse_args()
with CachingFetcher(args.cache_path, args.timeout) as fetcher:
with open(args.out + '.tex', 'wb') as texout:
- yield Spec(args.url, fetcher, PandocTexifier(args.pandoc or 'pandoc'), texout)
+ yield Spec(
+ args.url,
+ fetcher,
+ lambda x: ApplyHTMLFilters(args.htmlfilters, x),
+ lambda x: ApplyDOMFilters(args.domfilters, x),
+ PandocTexifier(args.pandoc or 'pandoc'),
+ texout)