Rewrite to use optparse for user interface, picking up a few minor bug fixes and improvements in the process, but trying to keep otherwise unchanged.
This commit is contained in:
parent
4bbb5923b3
commit
d8ffea56e5
4 changed files with 112 additions and 118 deletions
|
|
@ -8,3 +8,4 @@ Thanks to the following contributors to scour:
|
|||
* Martin:
|
||||
- better methods of handling string-to-float conversions in Python
|
||||
- document functions in the traditional Python way
|
||||
- rewrite option parsing code
|
||||
|
|
|
|||
|
|
@ -3,6 +3,6 @@ mkdir $1
|
|||
for FILE in `ls fulltests`
|
||||
do
|
||||
echo Doing $FILE:
|
||||
./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt
|
||||
./scour.py -i fulltests/$FILE -o $1/$FILE 2>> $1/report.txt
|
||||
done
|
||||
|
||||
214
scour.py
214
scour.py
|
|
@ -57,14 +57,11 @@ import xml.dom.minidom
|
|||
import re
|
||||
import math
|
||||
import base64
|
||||
import os.path
|
||||
import urllib
|
||||
from svg_regex import svg_parser
|
||||
from decimal import *
|
||||
import gzip
|
||||
|
||||
# set precision to 5 decimal places by default
|
||||
getcontext().prec = 5
|
||||
import optparse
|
||||
|
||||
APP = 'scour'
|
||||
VER = '0.14'
|
||||
|
|
@ -829,7 +826,7 @@ def repairStyle(node, options):
|
|||
|
||||
# now if any of the properties match known SVG attributes we prefer attributes
|
||||
# over style so emit them and remove them from the style map
|
||||
if not '--disable-style-to-xml' in options:
|
||||
if options.style_to_xml:
|
||||
for propName in styleMap.keys() :
|
||||
if propName in svgAttributes :
|
||||
node.setAttribute(propName, styleMap[propName])
|
||||
|
|
@ -1313,7 +1310,10 @@ def properlySizeDoc(docElement):
|
|||
# this is the main method
|
||||
# input is a string representation of the input XML
|
||||
# returns a string representation of the output XML
|
||||
def scourString(in_string, options=[]):
|
||||
def scourString(in_string, options=None):
|
||||
if options is None:
|
||||
options = _options_parser.get_default_values()
|
||||
getcontext().prec = options.digits
|
||||
global numAttrsRemoved
|
||||
global numStylePropsFixed
|
||||
global numElemsRemoved
|
||||
|
|
@ -1343,7 +1343,7 @@ def scourString(in_string, options=[]):
|
|||
numStylePropsFixed = repairStyle(doc.documentElement, options)
|
||||
|
||||
# convert colors to #RRGGBB format
|
||||
if not '--disable-simplify-colors' in options:
|
||||
if options.simple_colors:
|
||||
numBytesSavedInColors = convertColors(doc.documentElement)
|
||||
|
||||
# remove empty defs, metadata, g
|
||||
|
|
@ -1366,14 +1366,14 @@ def scourString(in_string, options=[]):
|
|||
while removeUnreferencedElements(doc) > 0:
|
||||
pass
|
||||
|
||||
if '--enable-id-stripping' in options:
|
||||
if options.strip_ids:
|
||||
bContinueLooping = True
|
||||
while bContinueLooping:
|
||||
identifiedElements = findElementsWithId(doc.documentElement)
|
||||
referencedIDs = findReferencedElements(doc.documentElement)
|
||||
bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
|
||||
|
||||
if not '--disable-group-collapsing' in options:
|
||||
if options.group_collapse:
|
||||
while removeNestedGroups(doc.documentElement) > 0:
|
||||
pass
|
||||
|
||||
|
|
@ -1417,133 +1417,123 @@ def scourString(in_string, options=[]):
|
|||
# used mostly by unit tests
|
||||
# input is a filename
|
||||
# returns the minidom doc representation of the SVG
|
||||
def scourXmlFile(filename, options=[]):
|
||||
def scourXmlFile(filename, options=None):
|
||||
in_string = open(filename).read()
|
||||
# print 'IN=',in_string
|
||||
out_string = scourString(in_string, options)
|
||||
# print 'OUT=',out_string
|
||||
return xml.dom.minidom.parseString(out_string.encode('utf-8'))
|
||||
|
||||
def printHeader():
|
||||
print APP , VER
|
||||
print COPYRIGHT
|
||||
# GZ: Seems most other commandline tools don't do this, is it really wanted?
|
||||
class HeaderedFormatter(optparse.IndentedHelpFormatter):
|
||||
"""
|
||||
Show application name, version number, and copyright statement
|
||||
above usage information.
|
||||
"""
|
||||
def format_usage(self, usage):
|
||||
return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
|
||||
optparse.IndentedHelpFormatter.format_usage(self, usage))
|
||||
|
||||
def printSyntaxAndQuit():
|
||||
printHeader()
|
||||
print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n'
|
||||
print 'If the input/output files are specified with a svgz extension, then compressed SVG is assumed.\n'
|
||||
print 'If the input file is not specified, stdin is used.'
|
||||
print 'If the output file is not specified, stdout is used.'
|
||||
print 'If an option is not available below that means it occurs automatically'
|
||||
print 'when scour is invoked. Available OPTIONS:\n'
|
||||
print ' --disable-simplify-colors : Scour will not convert all colors to #RRGGBB format'
|
||||
print ' --disable-style-to-xml : Scour will not convert style properties into XML attributes'
|
||||
print ' --disable-group-collapsing : Scour will not collapse <g> elements'
|
||||
print ' --enable-id-stripping : Scour will remove all un-referenced ID attributes'
|
||||
print ' --set-precision N : Scour will set the number of significant digits (default: 6)'
|
||||
print ''
|
||||
quit()
|
||||
# GZ: would prefer this to be in a function or class scope, but tests etc need
|
||||
# access to the defaults anyway
|
||||
_options_parser = optparse.OptionParser(
|
||||
usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
|
||||
description=("If the input/output files are specified with a svgz"
|
||||
" extension, then compressed SVG is assumed. If the input file is not"
|
||||
" specified, stdin is used. If the output file is not specified, "
|
||||
" stdout is used. If an option is not available below that means it"
|
||||
" occurs automatically."),
|
||||
formatter=HeaderedFormatter(max_help_position=30),
|
||||
version=VER)
|
||||
|
||||
# returns a tuple with:
|
||||
# input stream, output stream, a list of options specified on the command-line,
|
||||
# input filename, and output filename
|
||||
def parseCLA():
|
||||
args = sys.argv[1:]
|
||||
_options_parser.add_option("--disable-simplify-colors",
|
||||
action="store_false", dest="simple_colors", default=True,
|
||||
help="won't convert all colors to #RRGGBB format")
|
||||
_options_parser.add_option("--disable-style-to-xml",
|
||||
action="store_false", dest="style_to_xml", default=True,
|
||||
help="won't convert styles into XML attributes")
|
||||
_options_parser.add_option("--disable-group-collapsing",
|
||||
action="store_false", dest="group_collapse", default=True,
|
||||
help="won't collapse <g> elements")
|
||||
_options_parser.add_option("--enable-id-stripping",
|
||||
action="store_true", dest="strip_ids", default=False,
|
||||
help="remove all un-referenced ID attributes")
|
||||
# GZ: this is confusing, most people will be thinking in terms of
|
||||
# decimal places, which is not what decimal precision is doing
|
||||
_options_parser.add_option("-p", "--set-precision",
|
||||
action="store", type=int, dest="digits", default=5,
|
||||
help="set number of significant digits (default: %default)")
|
||||
_options_parser.add_option("-i",
|
||||
action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
|
||||
_options_parser.add_option("-o",
|
||||
action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
|
||||
|
||||
# by default the input and output are the standard streams
|
||||
inputfilename = ''
|
||||
outputfilename = ''
|
||||
input = sys.stdin
|
||||
output = sys.stdout
|
||||
options = []
|
||||
validOptions = [
|
||||
'--disable-simplify-colors',
|
||||
'--disable-style-to-xml',
|
||||
'--disable-group-collapsing',
|
||||
'--enable-id-stripping',
|
||||
'--set-precision',
|
||||
]
|
||||
def maybe_gziped_file(filename, mode="r"):
|
||||
if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
|
||||
return gzip.GzipFile(filename, mode)
|
||||
return file(filename, mode)
|
||||
|
||||
i = 0
|
||||
while i < len(args):
|
||||
arg = args[i]
|
||||
i += 1
|
||||
if arg == '-i' :
|
||||
if i < len(args) :
|
||||
inputfilename = args[i]
|
||||
if args[i][-5:] == '.svgz':
|
||||
input = gzip.open(args[i], 'rb')
|
||||
else:
|
||||
input = open(args[i], 'r')
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
printSyntaxAndQuit()
|
||||
elif arg == '-o' :
|
||||
if i < len(args) :
|
||||
outputfilename = args[i]
|
||||
if args[i][-5:] == '.svgz':
|
||||
output = gzip.open(args[i], 'wb')
|
||||
else:
|
||||
output = open(args[i], 'w')
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
printSyntaxAndQuit()
|
||||
elif arg == '--set-precision':
|
||||
if i < len(args):
|
||||
getcontext().prec = int(args[i])
|
||||
i += 1
|
||||
continue
|
||||
else:
|
||||
printSyntaxAndQuit()
|
||||
elif arg in validOptions :
|
||||
options.append(arg)
|
||||
else :
|
||||
print 'Error! Invalid argument:', arg
|
||||
printSyntaxAndQuit()
|
||||
def parse_args(args=None):
|
||||
options, rargs = _options_parser.parse_args(args)
|
||||
|
||||
return (input, output, options, inputfilename, outputfilename)
|
||||
if rargs:
|
||||
parser.error("Additional arguments not handled: %r" % rargs)
|
||||
if options.digits < 0:
|
||||
parser.error("Can't have negative significant digits")
|
||||
if options.infilename:
|
||||
infile = maybe_gziped_file(options.infilename)
|
||||
# GZ: could catch a raised IOError here and report
|
||||
else:
|
||||
# GZ: could sniff for gzip compression here
|
||||
infile = sys.stdin
|
||||
if options.outfilename:
|
||||
outfile = maybe_gziped_file(options.outfilename, "w")
|
||||
else:
|
||||
outfile = sys.stdout
|
||||
|
||||
return options, [infile, outfile]
|
||||
|
||||
if __name__ == '__main__':
|
||||
if sys.platform == "win32":
|
||||
from time import clock as get_tick
|
||||
else:
|
||||
# GZ: is this different from time.time() in any way?
|
||||
def get_tick():
|
||||
return os.times()[0]
|
||||
|
||||
startTimes = os.times()
|
||||
start = get_tick()
|
||||
|
||||
(input, output, options, inputfilename, outputfilename) = parseCLA()
|
||||
options, (input, output) = parse_args()
|
||||
|
||||
# if we are not sending to stdout, then print out app information
|
||||
bOutputReport = False
|
||||
if output != sys.stdout :
|
||||
bOutputReport = True
|
||||
printHeader()
|
||||
print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
|
||||
|
||||
# do the work
|
||||
in_string = input.read()
|
||||
out_string = scourString(in_string, options)
|
||||
output.write(out_string.encode("utf-8"))
|
||||
out_string = scourString(in_string, options).encode("UTF-8")
|
||||
output.write(out_string)
|
||||
|
||||
# Close input and output files
|
||||
input.close()
|
||||
output.close()
|
||||
|
||||
endTimes = os.times()
|
||||
end = get_tick()
|
||||
|
||||
# output some statistics if we are not using stdout
|
||||
if bOutputReport :
|
||||
if inputfilename != '':
|
||||
print ' File:', inputfilename
|
||||
print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's'
|
||||
print ' Number of elements removed:', numElemsRemoved
|
||||
print ' Number of attributes removed:', numAttrsRemoved
|
||||
print ' Number of unreferenced id attributes removed:', numIDsRemoved
|
||||
print ' Number of style properties fixed:', numStylePropsFixed
|
||||
print ' Number of raster images embedded inline:', numRastersEmbedded
|
||||
print ' Number of path segments reduced/removed:', numPathSegmentsReduced
|
||||
print ' Number of bytes saved in path data:', numBytesSavedInPathData
|
||||
print ' Number of bytes saved in colors:', numBytesSavedInColors
|
||||
oldsize = os.path.getsize(inputfilename)
|
||||
newsize = os.path.getsize(outputfilename)
|
||||
sizediff = (newsize / oldsize) * 100;
|
||||
print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
|
||||
# GZ: unless silenced by -q or something?
|
||||
# GZ: not using globals would be good too
|
||||
print >>sys.stderr, ' File:', input.name, \
|
||||
'\n Time taken:', str(end-start) + 's', \
|
||||
'\n Number of elements removed:', numElemsRemoved, \
|
||||
'\n Number of attributes removed:', numAttrsRemoved, \
|
||||
'\n Number of unreferenced id attributes removed:', numIDsRemoved, \
|
||||
'\n Number of style properties fixed:', numStylePropsFixed, \
|
||||
'\n Number of raster images embedded inline:', numRastersEmbedded, \
|
||||
'\n Number of path segments reduced/removed:', numPathSegmentsReduced, \
|
||||
'\n Number of bytes saved in path data:', numBytesSavedInPathData, \
|
||||
'\n Number of bytes saved in colors:', numBytesSavedInColors
|
||||
oldsize = len(in_string)
|
||||
newsize = len(out_string)
|
||||
sizediff = (newsize / oldsize) * 100
|
||||
print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
|
||||
'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -156,7 +156,8 @@ class KeepUnreferencedIDsWhenEnabled(unittest.TestCase):
|
|||
|
||||
class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase):
|
||||
def runTest(self):
|
||||
doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping'])
|
||||
doc = scour.scourXmlFile('unittests/ids-to-strip.svg',
|
||||
scour.parse_args(['--enable-id-stripping'])[0])
|
||||
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '',
|
||||
'<svg> ID not stripped' )
|
||||
|
||||
|
|
@ -168,7 +169,8 @@ class RemoveUselessNestedGroups(unittest.TestCase):
|
|||
|
||||
class DoNotRemoveUselessNestedGroups(unittest.TestCase):
|
||||
def runTest(self):
|
||||
doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', ['--disable-group-collapsing'])
|
||||
doc = scour.scourXmlFile('unittests/nested-useless-groups.svg',
|
||||
scour.parse_args(['--disable-group-collapsing'])[0])
|
||||
self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2,
|
||||
'Useless nested groups were removed despite --disable-group-collapsing' )
|
||||
|
||||
|
|
@ -388,7 +390,8 @@ class RemoveFillOpacityWhenFillNone(unittest.TestCase):
|
|||
|
||||
class ConvertFillPropertyToAttr(unittest.TestCase):
|
||||
def runTest(self):
|
||||
doc = scour.scourXmlFile('unittests/fill-none.svg', '--disable-simplify-colors')
|
||||
doc = scour.scourXmlFile('unittests/fill-none.svg',
|
||||
scour.parse_args(['--disable-simplify-colors'])[0])
|
||||
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black',
|
||||
'fill property not converted to XML attribute' )
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue