Support multiple input files with -o being a directory
This change makes it possible for scour to consume multiple input
files in one command invocation. E.g.
$ scour file1.svg file2.svgz ... output-directory
# xargs friendly variant
$ scour -o output-directory file1.svg file2.svgz ...
This avoids most of the "startup" overhead in python and scour when
many files are being processed. On about a 100 of (already scour'ed)
gnuplot svg graphs, this change provides an almost 40% speed up compared
to a shell alternative:
# Original shell pipeline (~29s)
# Note; for bash, rewriting this without the "basename"-call does
# not seem to improve performance considerably.
$ for FILE in input/[01]* ; do \
python3 -m scour.scour "$FILE" output/"$(basename "$FILE")" > /dev/null ; \
done
# With this patch (~16s)
$ python3 -m scour.scour input/[01]* output > /dev/null
Signed-off-by: Niels Thykier <niels@thykier.net>
This commit is contained in:
parent
82ce83acab
commit
c42dc6b793
2 changed files with 40 additions and 20 deletions
|
|
@ -3630,10 +3630,12 @@ class HeaderedFormatter(optparse.IndentedHelpFormatter):
|
|||
# GZ: would prefer this to be in a function or class scope, but tests etc need
|
||||
# access to the defaults anyway
|
||||
_options_parser = optparse.OptionParser(
|
||||
usage="%prog [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]",
|
||||
usage="%prog [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]",
|
||||
description=("If the input/output files are not specified, stdin/stdout are used. "
|
||||
"If the input/output files are specified with a svgz extension, "
|
||||
"then compressed SVG is assumed."),
|
||||
"then compressed SVG is assumed. Multiple input files can be specified "
|
||||
"when output is a directory. The ouput files will be the basename of "
|
||||
"the input files."),
|
||||
formatter=HeaderedFormatter(max_help_position=33),
|
||||
version=VER)
|
||||
|
||||
|
|
@ -3648,11 +3650,11 @@ _options_parser.add_option("-v", "--verbose",
|
|||
action="store_true", dest="verbose", default=False,
|
||||
help="verbose output (statistics, etc.)")
|
||||
_options_parser.add_option("-i",
|
||||
action="store", dest="infilename", metavar="INPUT.SVG",
|
||||
help="alternative way to specify input filename")
|
||||
action="append", dest="infilenames", metavar="INPUT.SVG",
|
||||
help="alternative way to specify input filenames")
|
||||
_options_parser.add_option("-o",
|
||||
action="store", dest="outfilename", metavar="OUTPUT.SVG",
|
||||
help="alternative way to specify output filename")
|
||||
action="store", dest="outfilename", metavar="OUTPUT",
|
||||
help="alternative way to specify output (either a file or a directory)")
|
||||
|
||||
_option_group_optimization = optparse.OptionGroup(_options_parser, "Optimization")
|
||||
_option_group_optimization.add_option("--set-precision",
|
||||
|
|
@ -3766,10 +3768,11 @@ def parse_args(args=None, ignore_additional_args=False):
|
|||
options, rargs = _options_parser.parse_args(args)
|
||||
|
||||
if rargs:
|
||||
if not options.infilename:
|
||||
options.infilename = rargs.pop(0)
|
||||
if not options.outfilename and rargs:
|
||||
options.outfilename = rargs.pop(0)
|
||||
if not options.outfilename and len(rargs) > 1:
|
||||
options.outfilename = rargs.pop()
|
||||
if not options.infilenames:
|
||||
options.infilenames = rargs
|
||||
rargs = []
|
||||
if not ignore_additional_args and rargs:
|
||||
_options_parser.error("Additional arguments not handled: %r, see --help" % rargs)
|
||||
if options.digits < 1:
|
||||
|
|
@ -3782,8 +3785,15 @@ def parse_args(args=None, ignore_additional_args=False):
|
|||
_options_parser.error("Invalid value for --indent, see --help")
|
||||
if options.indent_depth < 0:
|
||||
_options_parser.error("Value for --nindent should be positive (or zero), see --help")
|
||||
if options.infilename and options.outfilename and options.infilename == options.outfilename:
|
||||
_options_parser.error("Input filename is the same as output filename")
|
||||
if options.infilenames:
|
||||
if len(options.infilenames) > 1:
|
||||
if not options.outfilename or not os.path.isdir(options.outfilename):
|
||||
_options_parser.error("Multiple input files requires an directory as output (-o)")
|
||||
elif len(options.infilenames) == 0 and options.outfilename and options.infilenames[0] == options.outfilename:
|
||||
_options_parser.error("Input filename is the same as output filename")
|
||||
else:
|
||||
if options.outfilename and os.path.isdir(options.outfilename):
|
||||
_options_parser.error("Cannot use a directory as output when input is stdin")
|
||||
|
||||
return options
|
||||
|
||||
|
|
@ -3811,12 +3821,12 @@ def maybe_gziped_file(filename, mode="r"):
|
|||
return open(filename, mode)
|
||||
|
||||
|
||||
def getInOut(options):
|
||||
def getInOut(input_file, options):
|
||||
references_relative_to = None
|
||||
if options.infilename:
|
||||
infile = maybe_gziped_file(options.infilename, "rb")
|
||||
if input_file is not None:
|
||||
infile = maybe_gziped_file(input_file, "rb")
|
||||
# GZ: could catch a raised IOError here and report
|
||||
references_relative_to = os.path.dirname(options.infilename)
|
||||
references_relative_to = os.path.dirname(input_file)
|
||||
else:
|
||||
# GZ: could sniff for gzip compression here
|
||||
#
|
||||
|
|
@ -3830,7 +3840,11 @@ def getInOut(options):
|
|||
_options_parser.error("No input file specified, see --help for detailed usage information")
|
||||
|
||||
if options.outfilename:
|
||||
outfile = maybe_gziped_file(options.outfilename, "wb")
|
||||
dest = options.outfilename
|
||||
if os.path.isdir(dest):
|
||||
assert input_file is not None
|
||||
dest = os.path.join(dest, os.path.basename(input_file))
|
||||
outfile = maybe_gziped_file(dest, "wb")
|
||||
else:
|
||||
# open the binary buffer of stdout as the output is already encoded
|
||||
try:
|
||||
|
|
@ -3901,8 +3915,14 @@ def start(options, input, output, references_relative_to=None):
|
|||
|
||||
def run():
|
||||
options = parse_args()
|
||||
(input, input_relative_to, output) = getInOut(options)
|
||||
start(options, input, output, input_relative_to)
|
||||
input_files = options.infilenames if options.infilenames is not None else []
|
||||
if input_files and input_files[0] is not None:
|
||||
for filename in input_files:
|
||||
(input, input_relative_to, output) = getInOut(filename, options)
|
||||
start(options, input, output, input_relative_to)
|
||||
else:
|
||||
(input, input_relative_to, output) = getInOut(None, options)
|
||||
start(options, input, output, input_relative_to)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -2358,7 +2358,7 @@ class DoNotStripXmlSpaceAttribute(unittest.TestCase):
|
|||
|
||||
class CommandLineUsage(unittest.TestCase):
|
||||
|
||||
USAGE_STRING = "Usage: scour [INPUT.SVG [OUTPUT.SVG]] [OPTIONS]"
|
||||
USAGE_STRING = "Usage: scour [INPUT.SVG [[... INPUT.SVG] OUTPUT]] [OPTIONS]"
|
||||
MINIMAL_SVG = '<?xml version="1.0" encoding="UTF-8"?>\n' \
|
||||
'<svg xmlns="http://www.w3.org/2000/svg"/>\n'
|
||||
TEMP_SVG_FILE = 'testscour_temp.svg'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue