Remove XML pretty printing due to erroneous injection of whitespace on text nodes

2009-04-28 13:18:29 -05:00 · 2009-04-28 13:18:29 -05:00 · 02602edde0
commit 02602edde0
parent f37fef89df
2 changed files with 30 additions and 16 deletions
--- a/release-notes.html
+++ b/release-notes.html
@ -9,6 +9,24 @@
 <p>Copyright 2009, Jeff Schiller</p>
 <section id="0.11">
 	<header>
 		<h2><a href="#0.11">Version 0.11</a></h2>
 	</header>
 	<ul>
 		<li>convert gradient stop offsets from percentages to float</li>
 		<li>convert gradient stop offsets to integers if possible (0 or 1)</li>
 		<li>fix bug in line-to-hz conversion</li>
 		<li>handle non-ASCII characters (Unicode)</li>
 		<li>remove empty line or curve segments from path</li>
 		<li>added option to prevent style-to-xml conversion</li>
 		<li>handle compressed svg (svgz) on the input and output</li>
 		<li>added total time taken to the report</li>
 		<li>Removed XML pretty printing because of <a href="http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/">this problem</a>.</li>
 	</ul>
 </section>
 <section id="0.10">
 	<header>
 		<h2><a href="#0.10">Version 0.10</a></h2>
--- a/scour.py
+++ b/scour.py
@ -34,8 +34,6 @@
 #  * Convert RGB colours from RGB(r,g,b) to #RRGGBB format
 #  * Convert RGB colours from #RRGGBB to #RGB if possible
 # * Clean up paths
 #  * Detect vertical/horizontal lines and replace.
 #  * Eliminate empty path segments
 #  * Eliminate last segment in a polygon
 #  * Collapse straight curves.
 # * Process Transformations
@ -46,14 +44,7 @@
 #  * Put id attributes first in the serialization (or make the d attribute last)
 # Next Up:
-# + convert gradient stop offsets from percentages to float
+# - text elements are shoved over to the right
 # + convert gradient stop offsets to integers if possible (0 or 1)
 # + fix bug in line-to-hz conversion
 # + handle non-ASCII characters (Unicode)
 # + remove empty line or curve segments from path
 # + added option to prevent style-to-xml conversion
 # + handle compressed svg (svgz) on the input and output
 # - display how long it took to scour the file in the report
 # - prevent elements from being stripped if they are referenced in a <style> element
 #   (for instance, filter, marker, pattern) - need a crude CSS parser
 # - Remove unnecessary units of precision on attributes (use decimal:
@ -669,7 +660,6 @@ def cleanPath(element) :
 	# however, this parser object has some ugliness in it (lists of tuples of tuples of 
 	# numbers and booleans).  we just need a list of (cmd,[numbers]):
 	# TODO: remove empty path segments	
 	path = []
 	for (cmd,dataset) in pathObj:
 		if cmd in ['M','m','L','l','T','t']:
@ -688,10 +678,7 @@ def cleanPath(element) :
 			# one or more numbers
 			nums = []
 			for n in dataset:
-				if n != 0:
+				nums.append(Decimal(str(n)))
 					nums.append(Decimal(str(n)))
 				else:
 					numPathSegmentsRemoved += 1
 			if nums:
 				path.append( (cmd, nums) )
@ -1072,7 +1059,10 @@ def scourString(in_string, options=[]):
 	properlySizeDoc(doc.documentElement)
 	# output the document as a pretty string with a single space for indent
-	out_string = doc.documentElement.toprettyxml(' ')
+	# NOTE: removed pretty printing because of this problem:
 	# http://ronrothman.com/public/leftbraned/xml-dom-minidom-toprettyxml-and-silly-whitespace/
 #	out_string = doc.documentElement.toprettyxml(' ')
 	out_string = doc.documentElement.toxml()
 	# now strip out empty lines
 	lines = []
@ -1165,6 +1155,9 @@ def parseCLA():
 if __name__ == '__main__':
 	startTimes = os.times()
 #	print times[0], times[1]
 	(input, output, options, inputfilename, outputfilename) = parseCLA()
 	# if we are not sending to stdout, then print out app information
@ -1182,10 +1175,13 @@ if __name__ == '__main__':
 	input.close()
 	output.close()
 	endTimes = os.times()
 	# output some statistics if we are not using stdout
 	if bOutputReport :
 	    if inputfilename != '': 
 	    	print ' File:', inputfilename
 		print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's'
 		print ' Number of unreferenced id attributes removed:', numIDsRemoved 
 		print ' Number of elements removed:', numElemsRemoved
 		print ' Number of attributes removed:', numAttrsRemoved