From 77aadca98a120cf64609732c1abbd088f84124eb Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 24 Aug 2017 20:52:07 +0000 Subject: [PATCH 1/3] scour.py: Use named constants rather than literal integers for `nodeType' --- scour/scour.py | 67 +++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 11791c2..1dc0425 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -56,6 +56,7 @@ import re import sys import time import xml.dom.minidom +from xml.dom import Node from collections import namedtuple from decimal import Context, Decimal, InvalidOperation, getcontext @@ -540,7 +541,7 @@ def findElementsWithId(node, elems=None): for child in node.childNodes: # from http://www.w3.org/TR/DOM-Level-2-Core/idl-definitions.html # we are only really interested in nodes of type Element (1) - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: findElementsWithId(child, elems) return elems @@ -604,7 +605,7 @@ def findReferencedElements(node, ids=None): if node.hasChildNodes(): for child in node.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: findReferencedElements(child, ids) return ids @@ -645,7 +646,7 @@ def removeUnusedDefs(doc, defElem, elemsToRemove=None): keepTags = ['font', 'style', 'metadata', 'script', 'title', 'desc'] for elem in defElem.childNodes: # only look at it if an element and not referenced anywhere else - if elem.nodeType == 1 and (elem.getAttribute('id') == '' or + if elem.nodeType == Node.ELEMENT_NODE and (elem.getAttribute('id') == '' or elem.getAttribute('id') not in referencedIDs): # we only inspect the children of a group in a defs if the group # is not referenced anywhere else @@ -879,7 +880,7 @@ def removeUnreferencedIDs(referencedIDs, identifiedElements): def removeNamespacedAttributes(node, namespaces): global _num_attributes_removed num = 0 - if node.nodeType == 1: + if node.nodeType == Node.ELEMENT_NODE: # remove all namespace'd attributes from this element attrList = node.attributes attrsToRemove = [] @@ -901,7 +902,7 @@ def removeNamespacedAttributes(node, namespaces): def removeNamespacedElements(node, namespaces): global _num_elements_removed num = 0 - if node.nodeType == 1: + if node.nodeType == Node.ELEMENT_NODE: # remove all namespace'd child nodes from this element childList = node.childNodes childrenToRemove = [] @@ -959,12 +960,12 @@ def removeNestedGroups(node): groupsToRemove = [] # Only consider elements for promotion if this element isn't a . # (partial fix for bug 594930, required by the SVG spec however) - if not (node.nodeType == 1 and node.nodeName == 'switch'): + if not (node.nodeType == Node.ELEMENT_NODE and node.nodeName == 'switch'): for child in node.childNodes: if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0: # only collapse group if it does not have a title or desc as a direct descendant, for grandchild in child.childNodes: - if grandchild.nodeType == 1 and grandchild.namespaceURI == NS['SVG'] and \ + if grandchild.nodeType == Node.ELEMENT_NODE and grandchild.namespaceURI == NS['SVG'] and \ grandchild.nodeName in ['title', 'desc']: break else: @@ -979,7 +980,7 @@ def removeNestedGroups(node): # now recurse for children for child in node.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: num += removeNestedGroups(child) return num @@ -997,14 +998,14 @@ def moveCommonAttributesToParentGroup(elem, referencedElements): childElements = [] # recurse first into the children (depth-first) for child in elem.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: # only add and recurse if the child is not referenced elsewhere if not child.getAttribute('id') in referencedElements: childElements.append(child) num += moveCommonAttributesToParentGroup(child, referencedElements) # else if the parent has non-whitespace text children, do not # try to move common attributes - elif child.nodeType == 3 and child.nodeValue.strip(): + elif child.nodeType == Node.TEXT_NODE and child.nodeValue.strip(): return num # only process the children if there are more than one element @@ -1102,7 +1103,7 @@ def createGroupsForCommonAttributes(elem): while curChild >= 0: childNode = elem.childNodes.item(curChild) - if childNode.nodeType == 1 and childNode.getAttribute(curAttr) != '' and childNode.nodeName in [ + if childNode.nodeType == Node.ELEMENT_NODE and childNode.getAttribute(curAttr) != '' and childNode.nodeName in [ # only attempt to group elements that the content model allows to be children of a # SVG 1.1 (see https://www.w3.org/TR/SVG/struct.html#GElement) @@ -1130,7 +1131,7 @@ def createGroupsForCommonAttributes(elem): # attribute value, preserving any nodes in-between. while runStart > 0: nextNode = elem.childNodes.item(runStart - 1) - if nextNode.nodeType == 1: + if nextNode.nodeType == Node.ELEMENT_NODE: if nextNode.getAttribute(curAttr) != value: break else: @@ -1142,7 +1143,7 @@ def createGroupsForCommonAttributes(elem): if runElements >= 3: # Include whitespace/comment/etc. nodes in the run. while runEnd < elem.childNodes.length - 1: - if elem.childNodes.item(runEnd + 1).nodeType == 1: + if elem.childNodes.item(runEnd + 1).nodeType == Node.ELEMENT_NODE: break else: runEnd += 1 @@ -1186,7 +1187,7 @@ def createGroupsForCommonAttributes(elem): # each child gets the same treatment, recursively for childNode in elem.childNodes: - if childNode.nodeType == 1: + if childNode.nodeType == Node.ELEMENT_NODE: num += createGroupsForCommonAttributes(childNode) return num @@ -1202,7 +1203,7 @@ def removeUnusedAttributesOnParent(elem): childElements = [] # recurse first into the children (depth-first) for child in elem.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: childElements.append(child) num += removeUnusedAttributesOnParent(child) @@ -1302,11 +1303,11 @@ def collapseSinglyReferencedGradients(doc): # (Cyn: I've seen documents with #id references but no element with that ID!) if count == 1 and rid in identifiedElements: elem = identifiedElements[rid] - if elem is not None and elem.nodeType == 1 and elem.nodeName in ['linearGradient', 'radialGradient'] \ + if elem is not None and elem.nodeType == Node.ELEMENT_NODE and elem.nodeName in ['linearGradient', 'radialGradient'] \ and elem.namespaceURI == NS['SVG']: # found a gradient that is referenced by only 1 other element refElem = nodes[0] - if refElem.nodeType == 1 and refElem.nodeName in ['linearGradient', 'radialGradient'] \ + if refElem.nodeType == Node.ELEMENT_NODE and refElem.nodeName in ['linearGradient', 'radialGradient'] \ and refElem.namespaceURI == NS['SVG']: # elem is a gradient referenced by only one other gradient (refElem) @@ -1448,7 +1449,7 @@ def removeDuplicateGradients(doc): def _getStyle(node): u"""Returns the style attribute of a node as a dictionary.""" - if node.nodeType == 1 and len(node.getAttribute('style')) > 0: + if node.nodeType == Node.ELEMENT_NODE and len(node.getAttribute('style')) > 0: styleMap = {} rawStyles = node.getAttribute('style').split(';') for style in rawStyles: @@ -1614,7 +1615,7 @@ def styleInheritedFromParent(node, style): parentNode = node.parentNode # return None if we reached the Document element - if parentNode.nodeType == 9: + if parentNode.nodeType == Node.DOCUMENT_NODE: return None # check styles first (they take precedence over presentation attributes) @@ -1647,7 +1648,7 @@ def styleInheritedByChild(node, style, nodeIsChild=False): any style sheets are ignored! """ # Comment, text and CDATA nodes don't have attributes and aren't containers so they can't inherit attributes - if node.nodeType != 1: + if node.nodeType != Node.ELEMENT_NODE: return False if nodeIsChild: @@ -1702,7 +1703,7 @@ def mayContainTextNodes(node): result = True # Default value # Comment, text and CDATA nodes don't have attributes and aren't containers - if node.nodeType != 1: + if node.nodeType != Node.ELEMENT_NODE: result = False # Non-SVG elements? Unknown elements! elif node.namespaceURI != NS['SVG']: @@ -1920,7 +1921,7 @@ def removeDefaultAttributeValues(node, options, tainted=set()): For such attributes, we don't delete attributes with default values.""" num = 0 - if node.nodeType != 1: + if node.nodeType != Node.ELEMENT_NODE: return 0 # Conditionally remove all default attributes defined in 'default_attributes' (a list of 'DefaultAttribute's) @@ -1997,7 +1998,7 @@ def convertColors(element): """ numBytes = 0 - if element.nodeType != 1: + if element.nodeType != Node.ELEMENT_NODE: return 0 # set up list of color attributes for each element type @@ -2772,7 +2773,7 @@ def reducePrecision(element): _setStyle(element, styles) for child in element.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: num += reducePrecision(child) return num @@ -2989,7 +2990,7 @@ def optimizeTransforms(element, options): num += len(val) - len(newVal) for child in element.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: num += optimizeTransforms(child, options) return num @@ -3133,7 +3134,7 @@ def properlySizeDoc(docElement, options): def remapNamespacePrefix(node, oldprefix, newprefix): - if node is None or node.nodeType != 1: + if node is None or node.nodeType != Node.ELEMENT_NODE: return if node.prefix == oldprefix: @@ -3281,14 +3282,14 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False): onNewLine = False for child in element.childNodes: # element node - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: if preserveWhitespace: outParts.append(serializeXML(child, options, 0, preserveWhitespace)) else: outParts.extend([newline, serializeXML(child, options, indent + 1, preserveWhitespace)]) onNewLine = True # text node - elif child.nodeType == 3: + elif child.nodeType == Node.TEXT_NODE: # trim it only in the case of not being a child of an element # where whitespace might be important if preserveWhitespace: @@ -3296,10 +3297,10 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False): else: outParts.append(makeWellFormed(child.nodeValue.strip())) # CDATA node - elif child.nodeType == 4: + elif child.nodeType == Node.CDATA_SECTION_NODE: outParts.extend(['']) # Comment node - elif child.nodeType == 8: + elif child.nodeType == Node.COMMENT_NODE: outParts.extend(['']) # TODO: entities, processing instructions, what else? else: # ignore the rest @@ -3468,9 +3469,9 @@ def scourString(in_string, options=None): removeElem = not elem.hasChildNodes() if removeElem is False: for child in elem.childNodes: - if child.nodeType in [1, 4, 8]: + if child.nodeType in [Node.ELEMENT_NODE, Node.CDATA_SECTION_NODE, Node.COMMENT_NODE]: break - elif child.nodeType == 3 and not child.nodeValue.isspace(): + elif child.nodeType == Node.TEXT_NODE and not child.nodeValue.isspace(): break else: removeElem = True @@ -3594,7 +3595,7 @@ def scourString(in_string, options=None): total_output = "" for child in doc.childNodes: - if child.nodeType == 1: + if child.nodeType == Node.ELEMENT_NODE: total_output += "".join(lines) else: # doctypes, entities, comments total_output += child.toxml() + '\n' From fdbf890ba8795dd0c6388ce1dcdc91022fbeb6dc Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 25 Aug 2017 04:56:47 +0000 Subject: [PATCH 2/3] scour.py: Satisfy the identing rules of PEP8 --- scour/scour.py | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 1dc0425..fe468f0 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -647,7 +647,7 @@ def removeUnusedDefs(doc, defElem, elemsToRemove=None): for elem in defElem.childNodes: # only look at it if an element and not referenced anywhere else if elem.nodeType == Node.ELEMENT_NODE and (elem.getAttribute('id') == '' or - elem.getAttribute('id') not in referencedIDs): + elem.getAttribute('id') not in referencedIDs): # we only inspect the children of a group in a defs if the group # is not referenced anywhere else if elem.nodeName == 'g' and elem.namespaceURI == NS['SVG']: @@ -1103,23 +1103,27 @@ def createGroupsForCommonAttributes(elem): while curChild >= 0: childNode = elem.childNodes.item(curChild) - if childNode.nodeType == Node.ELEMENT_NODE and childNode.getAttribute(curAttr) != '' and childNode.nodeName in [ - # only attempt to group elements that the content model allows to be children of a + if ( + childNode.nodeType == Node.ELEMENT_NODE and + childNode.getAttribute(curAttr) != '' and + childNode.nodeName in [ + # only attempt to group elements that the content model allows to be children of a - # SVG 1.1 (see https://www.w3.org/TR/SVG/struct.html#GElement) - 'animate', 'animateColor', 'animateMotion', 'animateTransform', 'set', # animation elements - 'desc', 'metadata', 'title', # descriptive elements - 'circle', 'ellipse', 'line', 'path', 'polygon', 'polyline', 'rect', # shape elements - 'defs', 'g', 'svg', 'symbol', 'use', # structural elements - 'linearGradient', 'radialGradient', # gradient elements - 'a', 'altGlyphDef', 'clipPath', 'color-profile', 'cursor', 'filter', - 'font', 'font-face', 'foreignObject', 'image', 'marker', 'mask', - 'pattern', 'script', 'style', 'switch', 'text', 'view', + # SVG 1.1 (see https://www.w3.org/TR/SVG/struct.html#GElement) + 'animate', 'animateColor', 'animateMotion', 'animateTransform', 'set', # animation elements + 'desc', 'metadata', 'title', # descriptive elements + 'circle', 'ellipse', 'line', 'path', 'polygon', 'polyline', 'rect', # shape elements + 'defs', 'g', 'svg', 'symbol', 'use', # structural elements + 'linearGradient', 'radialGradient', # gradient elements + 'a', 'altGlyphDef', 'clipPath', 'color-profile', 'cursor', 'filter', + 'font', 'font-face', 'foreignObject', 'image', 'marker', 'mask', + 'pattern', 'script', 'style', 'switch', 'text', 'view', - # SVG 1.2 (see https://www.w3.org/TR/SVGTiny12/elementTable.html) - 'animation', 'audio', 'discard', 'handler', 'listener', - 'prefetch', 'solidColor', 'textArea', 'video' - ]: + # SVG 1.2 (see https://www.w3.org/TR/SVGTiny12/elementTable.html) + 'animation', 'audio', 'discard', 'handler', 'listener', + 'prefetch', 'solidColor', 'textArea', 'video' + ] + ): # We're in a possible run! Track the value and run length. value = childNode.getAttribute(curAttr) runStart, runEnd = curChild, curChild @@ -1303,8 +1307,12 @@ def collapseSinglyReferencedGradients(doc): # (Cyn: I've seen documents with #id references but no element with that ID!) if count == 1 and rid in identifiedElements: elem = identifiedElements[rid] - if elem is not None and elem.nodeType == Node.ELEMENT_NODE and elem.nodeName in ['linearGradient', 'radialGradient'] \ - and elem.namespaceURI == NS['SVG']: + if ( + elem is not None and + elem.nodeType == Node.ELEMENT_NODE and + elem.nodeName in ['linearGradient', 'radialGradient'] and + elem.namespaceURI == NS['SVG'] + ): # found a gradient that is referenced by only 1 other element refElem = nodes[0] if refElem.nodeType == Node.ELEMENT_NODE and refElem.nodeName in ['linearGradient', 'radialGradient'] \ From 942762b5cd6ebaceb68195f8eda7ec2911c6198e Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 24 Aug 2017 18:41:04 +0000 Subject: [PATCH 3/3] scour.py: minor rearrangement for the sake of clarity There has been a minor rearrangement of the code that handles the children of the element being serialized: The relevant `if' statement has had its condition effectively negated and thus has also had its consequent and alternative swapped; now, there is a very short consequent, followed by a very long alternative, rather than a very long consequent followed by a very short alternative. --- scour/scour.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index fe468f0..4a13125 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -3282,9 +3282,12 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False): elif attrValue == 'default': preserveWhitespace = False - # if no children, self-close children = element.childNodes - if children.length > 0: + if children.length == 0: + outParts.append('/>') + if indent > 0: + outParts.append(newline) + else: outParts.append('>') onNewLine = False @@ -3319,10 +3322,6 @@ def serializeXML(element, options, ind=0, preserveWhitespace=False): outParts.extend(['']) if indent > 0: outParts.append(newline) - else: - outParts.append('/>') - if indent > 0: - outParts.append(newline) return "".join(outParts)