Rewrite to use optparse for user interface, picking up a few minor bug fixes and improvements in the process, but trying to keep otherwise unchanged.

This commit is contained in:
Martin 2009-05-20 19:22:57 +01:00
parent 4bbb5923b3
commit d8ffea56e5
4 changed files with 112 additions and 118 deletions

View file

@ -8,3 +8,4 @@ Thanks to the following contributors to scour:
* Martin: * Martin:
- better methods of handling string-to-float conversions in Python - better methods of handling string-to-float conversions in Python
- document functions in the traditional Python way - document functions in the traditional Python way
- rewrite option parsing code

View file

@ -3,6 +3,6 @@ mkdir $1
for FILE in `ls fulltests` for FILE in `ls fulltests`
do do
echo Doing $FILE: echo Doing $FILE:
./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt ./scour.py -i fulltests/$FILE -o $1/$FILE 2>> $1/report.txt
done done

218
scour.py
View file

@ -57,14 +57,11 @@ import xml.dom.minidom
import re import re
import math import math
import base64 import base64
import os.path
import urllib import urllib
from svg_regex import svg_parser from svg_regex import svg_parser
from decimal import * from decimal import *
import gzip import gzip
import optparse
# set precision to 5 decimal places by default
getcontext().prec = 5
APP = 'scour' APP = 'scour'
VER = '0.14' VER = '0.14'
@ -829,7 +826,7 @@ def repairStyle(node, options):
# now if any of the properties match known SVG attributes we prefer attributes # now if any of the properties match known SVG attributes we prefer attributes
# over style so emit them and remove them from the style map # over style so emit them and remove them from the style map
if not '--disable-style-to-xml' in options: if options.style_to_xml:
for propName in styleMap.keys() : for propName in styleMap.keys() :
if propName in svgAttributes : if propName in svgAttributes :
node.setAttribute(propName, styleMap[propName]) node.setAttribute(propName, styleMap[propName])
@ -1313,7 +1310,10 @@ def properlySizeDoc(docElement):
# this is the main method # this is the main method
# input is a string representation of the input XML # input is a string representation of the input XML
# returns a string representation of the output XML # returns a string representation of the output XML
def scourString(in_string, options=[]): def scourString(in_string, options=None):
if options is None:
options = _options_parser.get_default_values()
getcontext().prec = options.digits
global numAttrsRemoved global numAttrsRemoved
global numStylePropsFixed global numStylePropsFixed
global numElemsRemoved global numElemsRemoved
@ -1343,7 +1343,7 @@ def scourString(in_string, options=[]):
numStylePropsFixed = repairStyle(doc.documentElement, options) numStylePropsFixed = repairStyle(doc.documentElement, options)
# convert colors to #RRGGBB format # convert colors to #RRGGBB format
if not '--disable-simplify-colors' in options: if options.simple_colors:
numBytesSavedInColors = convertColors(doc.documentElement) numBytesSavedInColors = convertColors(doc.documentElement)
# remove empty defs, metadata, g # remove empty defs, metadata, g
@ -1366,14 +1366,14 @@ def scourString(in_string, options=[]):
while removeUnreferencedElements(doc) > 0: while removeUnreferencedElements(doc) > 0:
pass pass
if '--enable-id-stripping' in options: if options.strip_ids:
bContinueLooping = True bContinueLooping = True
while bContinueLooping: while bContinueLooping:
identifiedElements = findElementsWithId(doc.documentElement) identifiedElements = findElementsWithId(doc.documentElement)
referencedIDs = findReferencedElements(doc.documentElement) referencedIDs = findReferencedElements(doc.documentElement)
bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0) bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
if not '--disable-group-collapsing' in options: if options.group_collapse:
while removeNestedGroups(doc.documentElement) > 0: while removeNestedGroups(doc.documentElement) > 0:
pass pass
@ -1417,133 +1417,123 @@ def scourString(in_string, options=[]):
# used mostly by unit tests # used mostly by unit tests
# input is a filename # input is a filename
# returns the minidom doc representation of the SVG # returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=[]): def scourXmlFile(filename, options=None):
in_string = open(filename).read() in_string = open(filename).read()
# print 'IN=',in_string # print 'IN=',in_string
out_string = scourString(in_string, options) out_string = scourString(in_string, options)
# print 'OUT=',out_string # print 'OUT=',out_string
return xml.dom.minidom.parseString(out_string.encode('utf-8')) return xml.dom.minidom.parseString(out_string.encode('utf-8'))
def printHeader(): # GZ: Seems most other commandline tools don't do this, is it really wanted?
print APP , VER class HeaderedFormatter(optparse.IndentedHelpFormatter):
print COPYRIGHT """
Show application name, version number, and copyright statement
above usage information.
"""
def format_usage(self, usage):
return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
optparse.IndentedHelpFormatter.format_usage(self, usage))
def printSyntaxAndQuit(): # GZ: would prefer this to be in a function or class scope, but tests etc need
printHeader() # access to the defaults anyway
print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n' _options_parser = optparse.OptionParser(
print 'If the input/output files are specified with a svgz extension, then compressed SVG is assumed.\n' usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
print 'If the input file is not specified, stdin is used.' description=("If the input/output files are specified with a svgz"
print 'If the output file is not specified, stdout is used.' " extension, then compressed SVG is assumed. If the input file is not"
print 'If an option is not available below that means it occurs automatically' " specified, stdin is used. If the output file is not specified, "
print 'when scour is invoked. Available OPTIONS:\n' " stdout is used. If an option is not available below that means it"
print ' --disable-simplify-colors : Scour will not convert all colors to #RRGGBB format' " occurs automatically."),
print ' --disable-style-to-xml : Scour will not convert style properties into XML attributes' formatter=HeaderedFormatter(max_help_position=30),
print ' --disable-group-collapsing : Scour will not collapse <g> elements' version=VER)
print ' --enable-id-stripping : Scour will remove all un-referenced ID attributes'
print ' --set-precision N : Scour will set the number of significant digits (default: 6)'
print ''
quit()
# returns a tuple with: _options_parser.add_option("--disable-simplify-colors",
# input stream, output stream, a list of options specified on the command-line, action="store_false", dest="simple_colors", default=True,
# input filename, and output filename help="won't convert all colors to #RRGGBB format")
def parseCLA(): _options_parser.add_option("--disable-style-to-xml",
args = sys.argv[1:] action="store_false", dest="style_to_xml", default=True,
help="won't convert styles into XML attributes")
_options_parser.add_option("--disable-group-collapsing",
action="store_false", dest="group_collapse", default=True,
help="won't collapse <g> elements")
_options_parser.add_option("--enable-id-stripping",
action="store_true", dest="strip_ids", default=False,
help="remove all un-referenced ID attributes")
# GZ: this is confusing, most people will be thinking in terms of
# decimal places, which is not what decimal precision is doing
_options_parser.add_option("-p", "--set-precision",
action="store", type=int, dest="digits", default=5,
help="set number of significant digits (default: %default)")
_options_parser.add_option("-i",
action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
_options_parser.add_option("-o",
action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
# by default the input and output are the standard streams def maybe_gziped_file(filename, mode="r"):
inputfilename = '' if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
outputfilename = '' return gzip.GzipFile(filename, mode)
input = sys.stdin return file(filename, mode)
output = sys.stdout
options = [] def parse_args(args=None):
validOptions = [ options, rargs = _options_parser.parse_args(args)
'--disable-simplify-colors',
'--disable-style-to-xml', if rargs:
'--disable-group-collapsing', parser.error("Additional arguments not handled: %r" % rargs)
'--enable-id-stripping', if options.digits < 0:
'--set-precision', parser.error("Can't have negative significant digits")
] if options.infilename:
infile = maybe_gziped_file(options.infilename)
i = 0 # GZ: could catch a raised IOError here and report
while i < len(args): else:
arg = args[i] # GZ: could sniff for gzip compression here
i += 1 infile = sys.stdin
if arg == '-i' : if options.outfilename:
if i < len(args) : outfile = maybe_gziped_file(options.outfilename, "w")
inputfilename = args[i] else:
if args[i][-5:] == '.svgz': outfile = sys.stdout
input = gzip.open(args[i], 'rb')
else: return options, [infile, outfile]
input = open(args[i], 'r')
i += 1
continue
else:
printSyntaxAndQuit()
elif arg == '-o' :
if i < len(args) :
outputfilename = args[i]
if args[i][-5:] == '.svgz':
output = gzip.open(args[i], 'wb')
else:
output = open(args[i], 'w')
i += 1
continue
else:
printSyntaxAndQuit()
elif arg == '--set-precision':
if i < len(args):
getcontext().prec = int(args[i])
i += 1
continue
else:
printSyntaxAndQuit()
elif arg in validOptions :
options.append(arg)
else :
print 'Error! Invalid argument:', arg
printSyntaxAndQuit()
return (input, output, options, inputfilename, outputfilename)
if __name__ == '__main__': if __name__ == '__main__':
if sys.platform == "win32":
from time import clock as get_tick
else:
# GZ: is this different from time.time() in any way?
def get_tick():
return os.times()[0]
startTimes = os.times() start = get_tick()
(input, output, options, inputfilename, outputfilename) = parseCLA() options, (input, output) = parse_args()
# if we are not sending to stdout, then print out app information print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
bOutputReport = False
if output != sys.stdout :
bOutputReport = True
printHeader()
# do the work # do the work
in_string = input.read() in_string = input.read()
out_string = scourString(in_string, options) out_string = scourString(in_string, options).encode("UTF-8")
output.write(out_string.encode("utf-8")) output.write(out_string)
# Close input and output files # Close input and output files
input.close() input.close()
output.close() output.close()
endTimes = os.times() end = get_tick()
# output some statistics if we are not using stdout # GZ: unless silenced by -q or something?
if bOutputReport : # GZ: not using globals would be good too
if inputfilename != '': print >>sys.stderr, ' File:', input.name, \
print ' File:', inputfilename '\n Time taken:', str(end-start) + 's', \
print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's' '\n Number of elements removed:', numElemsRemoved, \
print ' Number of elements removed:', numElemsRemoved '\n Number of attributes removed:', numAttrsRemoved, \
print ' Number of attributes removed:', numAttrsRemoved '\n Number of unreferenced id attributes removed:', numIDsRemoved, \
print ' Number of unreferenced id attributes removed:', numIDsRemoved '\n Number of style properties fixed:', numStylePropsFixed, \
print ' Number of style properties fixed:', numStylePropsFixed '\n Number of raster images embedded inline:', numRastersEmbedded, \
print ' Number of raster images embedded inline:', numRastersEmbedded '\n Number of path segments reduced/removed:', numPathSegmentsReduced, \
print ' Number of path segments reduced/removed:', numPathSegmentsReduced '\n Number of bytes saved in path data:', numBytesSavedInPathData, \
print ' Number of bytes saved in path data:', numBytesSavedInPathData '\n Number of bytes saved in colors:', numBytesSavedInColors
print ' Number of bytes saved in colors:', numBytesSavedInColors oldsize = len(in_string)
oldsize = os.path.getsize(inputfilename) newsize = len(out_string)
newsize = os.path.getsize(outputfilename) sizediff = (newsize / oldsize) * 100
sizediff = (newsize / oldsize) * 100; print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)' 'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'

View file

@ -156,7 +156,8 @@ class KeepUnreferencedIDsWhenEnabled(unittest.TestCase):
class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase): class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase):
def runTest(self): def runTest(self):
doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping']) doc = scour.scourXmlFile('unittests/ids-to-strip.svg',
scour.parse_args(['--enable-id-stripping'])[0])
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '', self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '',
'<svg> ID not stripped' ) '<svg> ID not stripped' )
@ -168,7 +169,8 @@ class RemoveUselessNestedGroups(unittest.TestCase):
class DoNotRemoveUselessNestedGroups(unittest.TestCase): class DoNotRemoveUselessNestedGroups(unittest.TestCase):
def runTest(self): def runTest(self):
doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', ['--disable-group-collapsing']) doc = scour.scourXmlFile('unittests/nested-useless-groups.svg',
scour.parse_args(['--disable-group-collapsing'])[0])
self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2, self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2,
'Useless nested groups were removed despite --disable-group-collapsing' ) 'Useless nested groups were removed despite --disable-group-collapsing' )
@ -388,7 +390,8 @@ class RemoveFillOpacityWhenFillNone(unittest.TestCase):
class ConvertFillPropertyToAttr(unittest.TestCase): class ConvertFillPropertyToAttr(unittest.TestCase):
def runTest(self): def runTest(self):
doc = scour.scourXmlFile('unittests/fill-none.svg', '--disable-simplify-colors') doc = scour.scourXmlFile('unittests/fill-none.svg',
scour.parse_args(['--disable-simplify-colors'])[0])
self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black', self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black',
'fill property not converted to XML attribute' ) 'fill property not converted to XML attribute' )