270 lines
8 KiB
Python
Executable file
270 lines
8 KiB
Python
Executable file
#!/usr/local/bin/python
|
|
# Scour
|
|
# Version 0.01
|
|
#
|
|
# Copyright 2009 Jeff Schiller
|
|
#
|
|
# This file is part of Scour, http://www.codedread.com/scour/
|
|
#
|
|
# Scour is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Scour is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Carve. If not, see http://www.gnu.org/licenses/ .
|
|
#
|
|
|
|
import sys
|
|
import string
|
|
import xml.dom.minidom
|
|
|
|
APP = 'scour'
|
|
VER = '0.03'
|
|
COPYRIGHT = 'Copyright Jeff Schiller, 2009'
|
|
|
|
NS = { 'SVG': 'http://www.w3.org/2000/svg',
|
|
'XLINK': 'http://www.w3.org/1999/xlink',
|
|
'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
|
|
'INKSCAPE': 'http://www.inkscape.org/namespaces/inkscape'
|
|
}
|
|
|
|
def printHeader():
|
|
print APP , VER
|
|
print COPYRIGHT
|
|
|
|
def printSyntaxAndQuit():
|
|
printHeader()
|
|
print 'usage: scour.py [-i input.svg] [-o output.svg]\n'
|
|
print 'If the input file is not specified, stdin is used.'
|
|
print 'If the output file is not specified, stdout is used.'
|
|
quit()
|
|
|
|
# parse command-line arguments
|
|
args = sys.argv[1:]
|
|
|
|
# by default the input and output are the standard streams
|
|
input = sys.stdin
|
|
output = sys.stdout
|
|
|
|
# if -i or -o is supplied, switch the stream to the file
|
|
if len(args) == 2:
|
|
if args[0] == '-i' :
|
|
input = open(args[1], 'r')
|
|
elif args[0] == '-o' :
|
|
output = open(args[1], 'w')
|
|
else:
|
|
printSyntaxAndQuit()
|
|
|
|
# if both -o and -o are supplied, switch streams to the files
|
|
elif len(args) == 4 :
|
|
if args[0] == '-i' and args[2] == '-o' :
|
|
input = open(args[1], 'r')
|
|
output = open(args[3], 'w')
|
|
elif args[0] == '-o' and args[2] == 'i' :
|
|
output = open(args[1], 'w')
|
|
input = open(args[3], 'r')
|
|
else:
|
|
printSyntaxAndQuit()
|
|
|
|
# else invalid syntax
|
|
elif len(args) != 0 :
|
|
printSyntaxAndQuit()
|
|
|
|
# if we are not sending to stdout, then print out app information
|
|
bOutputReport = False
|
|
if output != sys.stdout :
|
|
bOutputReport = True
|
|
printHeader()
|
|
|
|
# build DOM in memory
|
|
doc = xml.dom.minidom.parse(input)
|
|
|
|
# returns all elements with id attributes
|
|
def findElementsWithId(node,elems={}):
|
|
id = node.getAttribute('id')
|
|
if id != '' :
|
|
elems[id] = node
|
|
if node.hasChildNodes() :
|
|
for child in node.childNodes:
|
|
# from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html
|
|
# we are only really interested in nodes of type Element (1)
|
|
if child.nodeType == 1 :
|
|
findElementsWithId(child, elems)
|
|
return elems
|
|
|
|
# returns the number of times an id is referenced
|
|
# currently looks at fill, stroke and xlink:href attributes
|
|
def findReferencedElements(node,ids={}):
|
|
# TODO: error here (ids is not cleared upon next invocation), the
|
|
# input argument ids is clunky here (see below how it is called)
|
|
href = node.getAttributeNS(NS['XLINK'],'href')
|
|
|
|
# if xlink:href is set, then grab the id
|
|
if href != '' and len(href) > 1 and href[0] == '#':
|
|
# we remove the hash mark from the beginning of the id
|
|
id = href[1:]
|
|
if ids.has_key(id) :
|
|
ids[id] += 1
|
|
else:
|
|
ids[id] = 1
|
|
|
|
# now get all style properties and the fill, stroke, filter attributes
|
|
styles = string.split(node.getAttribute('style'),';')
|
|
referencingProps = ['fill', 'stroke', 'filter', 'clip-path', 'mask', 'marker-start',
|
|
'marker-end', 'marker-mid']
|
|
for attr in referencingProps:
|
|
styles.append( string.join([attr,node.getAttribute(attr)],':') )
|
|
|
|
for style in styles:
|
|
propval = string.split(style,':')
|
|
if len(propval) == 2 :
|
|
prop = propval[0].strip()
|
|
val = propval[1].strip()
|
|
if prop in referencingProps and val != '' and val[0:5] == 'url(#' :
|
|
id = val[5:val.find(')')]
|
|
if ids.has_key(id) :
|
|
ids[id] += 1
|
|
else:
|
|
ids[id] = 1
|
|
|
|
if node.hasChildNodes() :
|
|
for child in node.childNodes:
|
|
if child.nodeType == 1 :
|
|
findReferencedElements(child, ids)
|
|
return ids
|
|
|
|
numIDsRemoved = 0
|
|
numElemsRemoved = 0
|
|
numAttrsRemoved = 0
|
|
|
|
# removes the unreferenced ID attributes
|
|
# returns the number of ID attributes removed
|
|
def removeUnreferencedIDs(referencedIDs, identifiedElements):
|
|
global numIDsRemoved
|
|
num = 0;
|
|
for id in identifiedElements.keys():
|
|
node = identifiedElements[id]
|
|
if referencedIDs.has_key(id) == False :
|
|
node.removeAttribute('id')
|
|
# now remove the element from our list of elements with ids
|
|
# not necessary if we're calculating the array again every time
|
|
# del identifiedElements[id]
|
|
numIDsRemoved += 1
|
|
num += 1
|
|
return num
|
|
|
|
def vacuumDefs(doc):
|
|
global numElemsRemoved
|
|
num = 0
|
|
defs = doc.documentElement.getElementsByTagNameNS(NS['SVG'], 'defs')
|
|
for aDef in defs:
|
|
for elem in aDef.childNodes:
|
|
if elem.nodeType == 1 and elem.getAttribute('id') == '' :
|
|
aDef.removeChild(elem)
|
|
numElemsRemoved += 1
|
|
num += 1
|
|
return num
|
|
|
|
def removeNamespacedAttributes(node, namespaces):
|
|
global numAttrsRemoved
|
|
num = 0
|
|
if node.nodeType == 1 :
|
|
# remove all namespace'd attributes from this element
|
|
attrList = node.attributes
|
|
for attrNum in range(attrList.length):
|
|
attr = attrList.item(attrNum)
|
|
if attr != None and attr.namespaceURI in namespaces:
|
|
num += 1
|
|
numAttrsRemoved += 1
|
|
node.removeAttribute(attr.nodeName)
|
|
|
|
# now recurse for children
|
|
for child in node.childNodes:
|
|
removeNamespacedAttributes(child, namespaces)
|
|
return num
|
|
|
|
def removeNamespacedElements(node, namespaces):
|
|
global numElemsRemoved
|
|
num = 0
|
|
if node.nodeType == 1 :
|
|
# remove all namespace'd child nodes from this element
|
|
childList = node.childNodes
|
|
for child in childList:
|
|
if child != None and child.namespaceURI in namespaces:
|
|
num += 1
|
|
numElemsRemoved += 1
|
|
node.removeChild(child)
|
|
|
|
# now recurse for children
|
|
for child in node.childNodes:
|
|
removeNamespacedElements(child, namespaces)
|
|
return num
|
|
|
|
def repairStyle(node):
|
|
num = 0
|
|
|
|
if node.nodeType == 1 :
|
|
# get all style properties and stuff them into a dictionary
|
|
styleMap = { }
|
|
rawStyles = string.split(node.getAttribute('style'),';')
|
|
for style in rawStyles:
|
|
propval = string.split(style,':')
|
|
if len(propval) == 2 :
|
|
styleMap[propval[0].strip()] = propval[1].strip()
|
|
|
|
# I've seen this enough to know that I need to correct it:
|
|
# fill: url(#linearGradient4918) rgb(0, 0, 0);
|
|
for prop in ['fill', 'stroke'] :
|
|
if styleMap.has_key(prop) :
|
|
chunk = styleMap[prop].split(') ')
|
|
if len(chunk) == 2 and chunk[0][:5] == 'url(#' and chunk[1] == 'rgb(0, 0, 0)' :
|
|
styleMap[prop] = chunk[0] + ')'
|
|
num += 1
|
|
|
|
# sew our style back together
|
|
fixedStyle = ''
|
|
for prop in styleMap.keys() :
|
|
fixedStyle += prop + ':' + styleMap[prop] + ';'
|
|
node.setAttribute( 'style', fixedStyle )
|
|
|
|
for child in node.childNodes :
|
|
num += repairStyle(child)
|
|
|
|
return num
|
|
|
|
# for whatever reason this does not always remove all inkscape/sodipodi attributes/elements
|
|
# on the first pass, so we do it multiple times
|
|
# does it have to do with removal of children affecting the childlist?
|
|
while removeNamespacedElements( doc.documentElement, [ NS['SODIPODI'], NS['INKSCAPE'] ] ) > 0 :
|
|
pass
|
|
|
|
while removeNamespacedAttributes( doc.documentElement, [ NS['SODIPODI'], NS['INKSCAPE'] ] ) > 0 :
|
|
pass
|
|
|
|
bContinueLooping = True
|
|
while bContinueLooping:
|
|
identifiedElements = findElementsWithId(doc.documentElement, {})
|
|
referencedIDs = findReferencedElements(doc.documentElement, {})
|
|
bContinueLooping = ((removeUnreferencedIDs(referencedIDs, identifiedElements) + vacuumDefs(doc)) > 0)
|
|
|
|
numStylePropsFixed = repairStyle(doc.documentElement)
|
|
|
|
# output the document
|
|
doc.documentElement.writexml(output)
|
|
|
|
# Close input and output files
|
|
input.close()
|
|
output.close()
|
|
|
|
# output some statistics if we are not using stdout
|
|
if( bOutputReport):
|
|
print "Number of unreferenced id attributes removed:", numIDsRemoved
|
|
print "Number of elements removed:", numElemsRemoved
|
|
print "Number of attributes removed:", numAttrsRemoved
|
|
print "Number of style properties fixed:", numStylePropsFixed
|