Rewrite to use optparse for user interface, picking up a few minor bug fixes and improvements in the process, but trying to keep otherwise unchanged.

2009-05-20 19:22:57 +01:00 · 2009-05-20 19:22:57 +01:00 · d8ffea56e5
commit d8ffea56e5
parent 4bbb5923b3
4 changed files with 112 additions and 118 deletions
--- a/1
+++ b/1
@ -8,3 +8,4 @@ Thanks to the following contributors to scour:
 * Martin:
 	- better methods of handling string-to-float conversions in Python
 	- document functions in the traditional Python way
 	- rewrite option parsing code
--- a/crunch.sh
+++ b/crunch.sh
@ -3,6 +3,6 @@ mkdir $1
 for FILE in `ls fulltests`
 do
 	echo Doing $FILE:
-	./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt
+	./scour.py -i fulltests/$FILE -o $1/$FILE 2>> $1/report.txt
 done
--- a/scour.py
+++ b/scour.py
@ -57,14 +57,11 @@ import xml.dom.minidom
 import re
 import math
 import base64
 import os.path
 import urllib
 from svg_regex import svg_parser
 from decimal import *
 import gzip
-
+import optparse
 # set precision to 5 decimal places by default
 getcontext().prec = 5
 APP = 'scour'
 VER = '0.14'
@ -829,7 +826,7 @@ def repairStyle(node, options):
 		# now if any of the properties match known SVG attributes we prefer attributes 
 		# over style so emit them and remove them from the style map
-		if not '--disable-style-to-xml' in options:
+		if options.style_to_xml:
 			for propName in styleMap.keys() :
 				if propName in svgAttributes :
 					node.setAttribute(propName, styleMap[propName])
@ -1313,7 +1310,10 @@ def properlySizeDoc(docElement):
 # this is the main method
 # input is a string representation of the input XML
 # returns a string representation of the output XML
-def scourString(in_string, options=[]):
+def scourString(in_string, options=None):
 	if options is None:
 		options = _options_parser.get_default_values()
 	getcontext().prec = options.digits
 	global numAttrsRemoved
 	global numStylePropsFixed
 	global numElemsRemoved
@ -1343,7 +1343,7 @@ def scourString(in_string, options=[]):
 	numStylePropsFixed = repairStyle(doc.documentElement, options)
 	# convert colors to #RRGGBB format
-	if not '--disable-simplify-colors' in options:
+	if options.simple_colors:
 		numBytesSavedInColors = convertColors(doc.documentElement)
 	# remove empty defs, metadata, g
@ -1366,14 +1366,14 @@ def scourString(in_string, options=[]):
 	while removeUnreferencedElements(doc) > 0:
 		pass
-	if '--enable-id-stripping' in options:
+	if options.strip_ids:
 		bContinueLooping = True
 		while bContinueLooping:
 			identifiedElements = findElementsWithId(doc.documentElement)
 			referencedIDs = findReferencedElements(doc.documentElement)
 			bContinueLooping = (removeUnreferencedIDs(referencedIDs, identifiedElements) > 0)
-	if not '--disable-group-collapsing' in options:
+	if options.group_collapse:
 		while removeNestedGroups(doc.documentElement) > 0:
 			pass
@ -1417,133 +1417,123 @@ def scourString(in_string, options=[]):
 # used mostly by unit tests
 # input is a filename
 # returns the minidom doc representation of the SVG
-def scourXmlFile(filename, options=[]):
+def scourXmlFile(filename, options=None):
 	in_string = open(filename).read()
 #	print 'IN=',in_string
 	out_string = scourString(in_string, options)
 #	print 'OUT=',out_string
 	return xml.dom.minidom.parseString(out_string.encode('utf-8'))
-def printHeader():
+# GZ: Seems most other commandline tools don't do this, is it really wanted?
-	print APP , VER
+class HeaderedFormatter(optparse.IndentedHelpFormatter):
-	print COPYRIGHT
+	"""
 	Show application name, version number, and copyright statement
 	above usage information.
 	"""
 	def format_usage(self, usage):
 		return "%s %s\n%s\n%s" % (APP, VER, COPYRIGHT,
 			optparse.IndentedHelpFormatter.format_usage(self, usage))
-def printSyntaxAndQuit():
+# GZ: would prefer this to be in a function or class scope, but tests etc need
-	printHeader()
+#     access to the defaults anyway
-	print 'usage: scour.py [-i input.svg] [-o output.svg] [OPTIONS]\n'
+_options_parser = optparse.OptionParser(
-	print 'If the input/output files are specified with a svgz extension, then compressed SVG is assumed.\n'
+	usage="%prog [-i input.svg] [-o output.svg] [OPTIONS]",
-	print 'If the input file is not specified, stdin is used.'
+	description=("If the input/output files are specified with a svgz"
-	print 'If the output file is not specified, stdout is used.'
+	" extension, then compressed SVG is assumed. If the input file is not"
-	print 'If an option is not available below that means it occurs automatically'
+	" specified, stdin is used. If the output file is not specified, "
-	print 'when scour is invoked.  Available OPTIONS:\n'
+	" stdout is used. If an option is not available below that means it"
-	print '  --disable-simplify-colors  : Scour will not convert all colors to #RRGGBB format'
+	" occurs automatically."),
-	print '  --disable-style-to-xml     : Scour will not convert style properties into XML attributes'
+	formatter=HeaderedFormatter(max_help_position=30),
-	print '  --disable-group-collapsing : Scour will not collapse <g> elements'
+	version=VER)
 	print '  --enable-id-stripping      : Scour will remove all un-referenced ID attributes'
 	print '  --set-precision N          : Scour will set the number of significant digits (default: 6)'
 	print ''
 	quit()	
-# returns a tuple with:
+_options_parser.add_option("--disable-simplify-colors",
-# input stream, output stream, a list of options specified on the command-line, 
+	action="store_false", dest="simple_colors", default=True,
-# input filename, and output filename
+	help="won't convert all colors to #RRGGBB format")
-def parseCLA():
+_options_parser.add_option("--disable-style-to-xml",
-	args = sys.argv[1:]
+	action="store_false", dest="style_to_xml", default=True,
 	help="won't convert styles into XML attributes")
 _options_parser.add_option("--disable-group-collapsing",
 	action="store_false", dest="group_collapse", default=True,
 	help="won't collapse <g> elements")
 _options_parser.add_option("--enable-id-stripping",
 	action="store_true", dest="strip_ids", default=False,
 	help="remove all un-referenced ID attributes")
 # GZ: this is confusing, most people will be thinking in terms of
 #     decimal places, which is not what decimal precision is doing
 _options_parser.add_option("-p", "--set-precision",
 	action="store", type=int, dest="digits", default=5,
 	help="set number of significant digits (default: %default)")
 _options_parser.add_option("-i",
 	action="store", dest="infilename", help=optparse.SUPPRESS_HELP)
 _options_parser.add_option("-o",
 	action="store", dest="outfilename", help=optparse.SUPPRESS_HELP)
-	# by default the input and output are the standard streams
+def maybe_gziped_file(filename, mode="r"):
-	inputfilename = ''
+	if os.path.splitext(filename)[1].lower() in (".svgz", ".gz"):
-	outputfilename = ''
+		return gzip.GzipFile(filename, mode)
-	input = sys.stdin
+	return file(filename, mode)
-	output = sys.stdout
+
-	options = []
+def parse_args(args=None):
-	validOptions = [
+	options, rargs = _options_parser.parse_args(args)
-					'--disable-simplify-colors',
+
-					'--disable-style-to-xml',
+	if rargs:
-					'--disable-group-collapsing',
+		parser.error("Additional arguments not handled: %r" % rargs)
-					'--enable-id-stripping',
+	if options.digits < 0:
-					'--set-precision',
+		parser.error("Can't have negative significant digits")
-					]
+	if options.infilename:
-					
+		infile = maybe_gziped_file(options.infilename)
-	i = 0
+		# GZ: could catch a raised IOError here and report
-	while i < len(args):
+	else:
-		arg = args[i]
+		# GZ: could sniff for gzip compression here
-		i += 1
+		infile = sys.stdin
-		if arg == '-i' :
+	if options.outfilename:
-			if i < len(args) :
+		outfile = maybe_gziped_file(options.outfilename, "w")
-				inputfilename = args[i]
+	else:
-				if args[i][-5:] == '.svgz':
+		outfile = sys.stdout
-					input = gzip.open(args[i], 'rb')
+
-				else:
+	return options, [infile, outfile]
 					input = open(args[i], 'r')
 				i += 1
 				continue
 			else:
 				printSyntaxAndQuit()
 		elif arg == '-o' :
 			if i < len(args) :
 				outputfilename = args[i]
 				if args[i][-5:] == '.svgz':
 					output = gzip.open(args[i], 'wb')
 				else:
 					output = open(args[i], 'w')
 				i += 1
 				continue
 			else:
 				printSyntaxAndQuit()
 		elif arg == '--set-precision':
 			if i < len(args):
 				getcontext().prec = int(args[i])
 				i += 1
 				continue
 			else:
 				printSyntaxAndQuit()
 		elif arg in validOptions :
 			options.append(arg)
 		else :
 			print 'Error!  Invalid argument:', arg
 			printSyntaxAndQuit()
 	return (input, output, options, inputfilename, outputfilename)
 if __name__ == '__main__':
 	if sys.platform == "win32":
 		from time import clock as get_tick
 	else:
 		# GZ: is this different from time.time() in any way?
 		def get_tick():
 			return os.times()[0]
-	startTimes = os.times()
+	start = get_tick()
-	(input, output, options, inputfilename, outputfilename) = parseCLA()
+	options, (input, output) = parse_args()
-	# if we are not sending to stdout, then print out app information
+	print >>sys.stderr, "%s %s\n%s" % (APP, VER, COPYRIGHT)
 	bOutputReport = False
 	if output != sys.stdout :
 		bOutputReport = True
 		printHeader()
 	# do the work
 	in_string = input.read()
-	out_string = scourString(in_string, options)
+	out_string = scourString(in_string, options).encode("UTF-8")
-	output.write(out_string.encode("utf-8"))
+	output.write(out_string)
 	# Close input and output files
 	input.close()
 	output.close()
-	endTimes = os.times()
+	end = get_tick()
-	# output some statistics if we are not using stdout
+	# GZ: unless silenced by -q or something?
-	if bOutputReport :
+	# GZ: not using globals would be good too
-	    if inputfilename != '': 
+	print >>sys.stderr, ' File:', input.name, \
-	    	print ' File:', inputfilename
+		'\n Time taken:', str(end-start) + 's', \
-		print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's'
+		'\n Number of elements removed:', numElemsRemoved, \
-		print ' Number of elements removed:', numElemsRemoved
+		'\n Number of attributes removed:', numAttrsRemoved, \
-		print ' Number of attributes removed:', numAttrsRemoved
+		'\n Number of unreferenced id attributes removed:', numIDsRemoved, \
-		print ' Number of unreferenced id attributes removed:', numIDsRemoved 
+		'\n Number of style properties fixed:', numStylePropsFixed, \
-		print ' Number of style properties fixed:', numStylePropsFixed
+		'\n Number of raster images embedded inline:', numRastersEmbedded, \
-		print ' Number of raster images embedded inline:', numRastersEmbedded
+		'\n Number of path segments reduced/removed:', numPathSegmentsReduced, \
-		print ' Number of path segments reduced/removed:', numPathSegmentsReduced
+		'\n Number of bytes saved in path data:', numBytesSavedInPathData, \
-		print ' Number of bytes saved in path data:', numBytesSavedInPathData
+		'\n Number of bytes saved in colors:', numBytesSavedInColors
-		print ' Number of bytes saved in colors:', numBytesSavedInColors
+	oldsize = len(in_string)
-		oldsize = os.path.getsize(inputfilename)
+	newsize = len(out_string)
-		newsize = os.path.getsize(outputfilename)
+	sizediff = (newsize / oldsize) * 100
-		sizediff = (newsize / oldsize) * 100;
+	print >>sys.stderr, ' Original file size:', oldsize, 'bytes;', \
-		print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
+		'new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'
--- a/testscour.py
+++ b/testscour.py
@ -156,7 +156,8 @@ class KeepUnreferencedIDsWhenEnabled(unittest.TestCase):
 class RemoveUnreferencedIDsWhenEnabled(unittest.TestCase):
 	def runTest(self):
-		doc = scour.scourXmlFile('unittests/ids-to-strip.svg', ['--enable-id-stripping'])
+		doc = scour.scourXmlFile('unittests/ids-to-strip.svg',
 			scour.parse_args(['--enable-id-stripping'])[0])
 		self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'svg')[0].getAttribute('id'), '',
 			'<svg> ID not stripped' )
@ -168,7 +169,8 @@ class RemoveUselessNestedGroups(unittest.TestCase):
 class DoNotRemoveUselessNestedGroups(unittest.TestCase):
 	def runTest(self):
-		doc = scour.scourXmlFile('unittests/nested-useless-groups.svg', ['--disable-group-collapsing'])
+		doc = scour.scourXmlFile('unittests/nested-useless-groups.svg',
 			scour.parse_args(['--disable-group-collapsing'])[0])
 		self.assertEquals(len(doc.getElementsByTagNameNS(SVGNS, 'g')), 2,
 			'Useless nested groups were removed despite --disable-group-collapsing' )
@ -388,7 +390,8 @@ class RemoveFillOpacityWhenFillNone(unittest.TestCase):
 class ConvertFillPropertyToAttr(unittest.TestCase):
 	def runTest(self):
-		doc = scour.scourXmlFile('unittests/fill-none.svg', '--disable-simplify-colors')
+		doc = scour.scourXmlFile('unittests/fill-none.svg',
 			scour.parse_args(['--disable-simplify-colors'])[0])
 		self.assertEquals(doc.getElementsByTagNameNS(SVGNS, 'path')[1].getAttribute('fill'), 'black',
 			'fill property not converted to XML attribute' )