From aa9796ea87c90d081d1c2892660547fe7e01c807 Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 17 May 2020 19:04:53 +0000 Subject: [PATCH 1/9] Refactor: Create a g_tag_is_unmergeable Both `mergeSiblingGroupsWithCommonAttributes` and `removeNestedGroups` used the same code in different forms. Extract it into its own function. Signed-off-by: Niels Thykier --- scour/scour.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index c987bb0..63493bd 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1022,6 +1022,15 @@ def removeDescriptiveElements(doc, options): return num +def g_tag_is_unmergeable(node): + """Check if a tag can be merged or not + + tags with a title or descriptions should generally be left alone. + """ + return any(True for n in node.childNodes if n.nodeType == Node.ELEMENT_NODE + and n.nodeName in ('title', 'desc') and n.namespaceURI == NS['SVG']) + + def removeNestedGroups(node): """ This walks further and further down the tree, removing groups @@ -1038,11 +1047,7 @@ def removeNestedGroups(node): for child in node.childNodes: if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0: # only collapse group if it does not have a title or desc as a direct descendant, - for grandchild in child.childNodes: - if grandchild.nodeType == Node.ELEMENT_NODE and grandchild.namespaceURI == NS['SVG'] and \ - grandchild.nodeName in ['title', 'desc']: - break - else: + if not g_tag_is_unmergeable(child): groupsToRemove.append(child) for g in groupsToRemove: @@ -1169,11 +1174,7 @@ def mergeSiblingGroupsWithCommonAttributes(elem): if nextNode.nodeName != 'g' or nextNode.namespaceURI != NS['SVG']: break nextAttributes = {a.nodeName: a.nodeValue for a in nextNode.attributes.values()} - hasNoMergeTags = (True for n in nextNode.childNodes - if n.nodeType == Node.ELEMENT_NODE - and n.nodeName in ('title', 'desc') - and n.namespaceURI == NS['SVG']) - if attributes != nextAttributes or any(hasNoMergeTags): + if attributes != nextAttributes or g_tag_is_unmergeable(nextNode): break else: runElements += 1 From 7b9c4ee93508d08042cf4b910bcdcc47c71645ab Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 10 Jan 2021 11:35:06 +0000 Subject: [PATCH 2/9] Simplif loop logic Signed-off-by: Niels Thykier --- scour/scour.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 63493bd..70cba9b 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1381,12 +1381,11 @@ def removeUnusedAttributesOnParent(elem): unusedAttrs[attr.nodeName] = attr.nodeValue # for each child, if at least one child inherits the parent's attribute, then remove - for childNum in range(len(childElements)): - child = childElements[childNum] + for child in childElements: inheritedAttrs = [] for name in unusedAttrs: val = child.getAttribute(name) - if val == '' or val is None or val == 'inherit': + if val == '' or val == 'inherit': inheritedAttrs.append(name) for a in inheritedAttrs: del unusedAttrs[a] From a7a16799a21f124c4041d65c7dcd01f4a4c672ed Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 10 Jan 2021 13:54:52 +0000 Subject: [PATCH 3/9] Remove some dead assignments Signed-off-by: Niels Thykier --- scour/scour.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 70cba9b..fe6fcd2 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -3401,7 +3401,6 @@ def properlySizeDoc(docElement, options): # parse viewBox attribute vbSep = RE_COMMA_WSP.split(docElement.getAttribute('viewBox')) # if we have a valid viewBox we need to check it - vbWidth, vbHeight = 0, 0 if len(vbSep) == 4: try: # if x or y are specified and non-zero then it is not ok to overwrite it @@ -3436,7 +3435,6 @@ def remapNamespacePrefix(node, oldprefix, newprefix): parent = node.parentNode # create a replacement node - newNode = None if newprefix != '': newNode = doc.createElementNS(namespace, newprefix + ":" + localName) else: From 68c1e545dae30e7ab84837f4e3d267026485ea3d Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 10 Jan 2021 13:52:26 +0000 Subject: [PATCH 4/9] Replace global stats vars with a ScourStats object This enables us to get rid of all the global variables. I used the opportunity to update function names where call sites where affected to move scour a step towards a more pythonic style in general. Signed-off-by: Niels Thykier --- scour/scour.py | 260 +++++++++++++++++++++---------------------------- scour/stats.py | 28 ++++++ 2 files changed, 137 insertions(+), 151 deletions(-) create mode 100644 scour/stats.py diff --git a/scour/scour.py b/scour/scour.py index fe6fcd2..6a69d61 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -63,6 +63,7 @@ from decimal import Context, Decimal, InvalidOperation, getcontext import six from six.moves import range, urllib +from scour.stats import ScourStats from scour.svg_regex import svg_parser from scour.svg_transform import svg_transform_parser from scour.yocto_css import parseCssString @@ -666,14 +667,13 @@ def removeUnusedDefs(doc, defElem, elemsToRemove=None, referencedIDs=None): return elemsToRemove -def removeUnreferencedElements(doc, keepDefs): +def remove_unreferenced_elements(doc, keepDefs, stats): """ Removes all unreferenced elements except for , , , , and . Also vacuums the defs of any non-referenced renderable elements. Returns the number of unreferenced elements removed from the document. """ - global _num_elements_removed num = 0 # Remove certain unreferenced elements outside of defs @@ -688,8 +688,8 @@ def removeUnreferencedElements(doc, keepDefs): elemsToRemove = removeUnusedDefs(doc, aDef, referencedIDs=referencedIDs) for elem in elemsToRemove: elem.parentNode.removeChild(elem) - _num_elements_removed += 1 - num += 1 + stats.num_elements_removed += len(elemsToRemove) + num += len(elemsToRemove) for id in identifiedElements: if id not in referencedIDs: @@ -699,7 +699,7 @@ def removeUnreferencedElements(doc, keepDefs): and goner.parentNode.tagName != 'defs'): goner.parentNode.removeChild(goner) num += 1 - _num_elements_removed += 1 + stats.num_elements_removed += 1 return num @@ -937,20 +937,18 @@ def unprotected_ids(doc, options): return identifiedElements -def removeUnreferencedIDs(referencedIDs, identifiedElements): +def remove_unreferenced_ids(referencedIDs, identifiedElements): """ Removes the unreferenced ID attributes. Returns the number of ID attributes removed """ - global _num_ids_removed keepTags = ['font'] num = 0 for id in identifiedElements: node = identifiedElements[id] if id not in referencedIDs and node.nodeName not in keepTags: node.removeAttribute('id') - _num_ids_removed += 1 num += 1 return num @@ -994,7 +992,7 @@ def removeNamespacedElements(node, namespaces): return num -def removeDescriptiveElements(doc, options): +def remove_descriptive_elements(doc, options): elementTypes = [] if options.remove_descriptive_elements: elementTypes.extend(("title", "desc", "metadata")) @@ -1006,20 +1004,16 @@ def removeDescriptiveElements(doc, options): if options.remove_metadata: elementTypes.append("metadata") if not elementTypes: - return + return 0 - global _num_elements_removed - num = 0 elementsToRemove = [] for elementType in elementTypes: elementsToRemove.extend(doc.documentElement.getElementsByTagName(elementType)) for element in elementsToRemove: element.parentNode.removeChild(element) - num += 1 - _num_elements_removed += 1 - return num + return len(elementsToRemove) def g_tag_is_unmergeable(node): @@ -1031,13 +1025,12 @@ def g_tag_is_unmergeable(node): and n.nodeName in ('title', 'desc') and n.namespaceURI == NS['SVG']) -def removeNestedGroups(node): +def remove_nested_groups(node, stats): """ This walks further and further down the tree, removing groups which do not have any attributes or a title/desc child and promoting their children up one level """ - global _num_elements_removed num = 0 groupsToRemove = [] @@ -1054,13 +1047,14 @@ def removeNestedGroups(node): while g.childNodes.length > 0: g.parentNode.insertBefore(g.firstChild, g) g.parentNode.removeChild(g) - _num_elements_removed += 1 - num += 1 + + num += len(groupsToRemove) + stats.num_elements_removed += len(groupsToRemove) # now recurse for children for child in node.childNodes: if child.nodeType == Node.ELEMENT_NODE: - num += removeNestedGroups(child) + num += remove_nested_groups(child, stats) return num @@ -1215,7 +1209,7 @@ def mergeSiblingGroupsWithCommonAttributes(elem): return num -def createGroupsForCommonAttributes(elem): +def create_groups_for_common_attributes(elem, stats): """ Creates elements to contain runs of 3 or more consecutive child elements having at least one common attribute. @@ -1227,8 +1221,6 @@ def createGroupsForCommonAttributes(elem): This function acts recursively on the given element. """ - num = 0 - global _num_elements_removed # TODO perhaps all of the Presentation attributes in http://www.w3.org/TR/SVG/struct.html#GElement # could be added here @@ -1328,9 +1320,8 @@ def createGroupsForCommonAttributes(elem): # Include the group in elem's children. elem.childNodes.insert(runStart, group) group.parentNode = elem - num += 1 curChild = runStart - 1 - _num_elements_removed -= 1 + stats.num_elements_removed -= 1 else: curChild -= 1 else: @@ -1339,9 +1330,7 @@ def createGroupsForCommonAttributes(elem): # each child gets the same treatment, recursively for childNode in elem.childNodes: if childNode.nodeType == Node.ELEMENT_NODE: - num += createGroupsForCommonAttributes(childNode) - - return num + create_groups_for_common_attributes(childNode, stats) def removeUnusedAttributesOnParent(elem): @@ -1398,8 +1387,7 @@ def removeUnusedAttributesOnParent(elem): return num -def removeDuplicateGradientStops(doc): - global _num_elements_removed +def remove_duplicate_gradient_stops(doc, stats): num = 0 for gradType in ['linearGradient', 'radialGradient']: @@ -1432,15 +1420,13 @@ def removeDuplicateGradientStops(doc): for stop in stopsToRemove: stop.parentNode.removeChild(stop) - num += 1 - _num_elements_removed += 1 + num += len(stopsToRemove) + stats.num_elements_removed += len(stopsToRemove) - # linear gradients return num -def collapseSinglyReferencedGradients(doc): - global _num_elements_removed +def collapse_singly_referenced_gradients(doc, stats): num = 0 identifiedElements = findElementsWithId(doc.documentElement) @@ -1502,8 +1488,9 @@ def collapseSinglyReferencedGradients(doc): # now delete elem elem.parentNode.removeChild(elem) - _num_elements_removed += 1 + stats.num_elements_removed += 1 num += 1 + return num @@ -2277,12 +2264,10 @@ def convertColors(element): # reusing data structures, etc -def cleanPath(element, options): +def clean_path(element, options, stats): """ Cleans the path string (d attribute) of the element """ - global _num_bytes_saved_in_path_data - global _num_path_segments_removed # this gets the parser object from svg_regex.py oldPathStr = element.getAttribute('d') @@ -2433,34 +2418,34 @@ def cleanPath(element, options): while i < len(data): if data[i] == data[i + 1] == 0: del data[i:i + 2] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: i += 2 elif cmd == 'c': while i < len(data): if data[i] == data[i + 1] == data[i + 2] == data[i + 3] == data[i + 4] == data[i + 5] == 0: del data[i:i + 6] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: i += 6 elif cmd == 'a': while i < len(data): if data[i + 5] == data[i + 6] == 0: del data[i:i + 7] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: i += 7 elif cmd == 'q': while i < len(data): if data[i] == data[i + 1] == data[i + 2] == data[i + 3] == 0: del data[i:i + 4] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: i += 4 elif cmd in ['h', 'v']: oldLen = len(data) path[pathIndex] = (cmd, [coord for coord in data if coord != 0]) - _num_path_segments_removed += len(path[pathIndex][1]) - oldLen + stats.num_path_segments_removed += len(path[pathIndex][1]) - oldLen # remove no-op commands pathIndex = len(path) @@ -2481,7 +2466,7 @@ def cleanPath(element, options): # continue a draw on the same subpath after a # "z"). del path[pathIndex] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: # it is not safe to rewrite "m0 0 ..." to "l..." # because of this "z" command. @@ -2491,7 +2476,7 @@ def cleanPath(element, options): # Ends with an empty move (but no line/draw # following it) del path[pathIndex] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 continue if subpath_needs_anchor: subpath_needs_anchor = False @@ -2499,7 +2484,7 @@ def cleanPath(element, options): # unanchored, i.e. we can replace "m0 0 ..." with # "l..." as there is no "z" after it. path[pathIndex] = ('l', data[2:]) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 # fixup: Delete subcommands having no coordinates. path = [elem for elem in path if len(elem[1]) > 0 or elem[0] == 'z'] @@ -2587,14 +2572,14 @@ def cleanPath(element, options): lineTuples = [] # append the v and then the remaining line coords newPath.append(('v', [data[i + 1]])) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 elif data[i + 1] == 0: if lineTuples: # flush the line command, then append the h and then the remaining line coords newPath.append(('l', lineTuples)) lineTuples = [] newPath.append(('h', [data[i]])) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: lineTuples.extend(data[i:i + 2]) i += 2 @@ -2614,7 +2599,7 @@ def cleanPath(element, options): cmd = 'l' # dealing with linetos now # append the v and then the remaining line coords newPath.append(('v', [data[i + 1]])) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 elif data[i + 1] == 0: if lineTuples: # flush the m/l command, then append the h and then the remaining line coords @@ -2622,7 +2607,7 @@ def cleanPath(element, options): lineTuples = [] cmd = 'l' # dealing with linetos now newPath.append(('h', [data[i]])) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: lineTuples.extend(data[i:i + 2]) i += 2 @@ -2651,7 +2636,7 @@ def cleanPath(element, options): curveTuples = [] # append the s command newPath.append(('s', [data[i + 2], data[i + 3], data[i + 4], data[i + 5]])) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: j = 0 while j <= 5: @@ -2676,7 +2661,7 @@ def cleanPath(element, options): curveTuples = [] # append the t command newPath.append(('t', [data[i + 2], data[i + 3]])) - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: j = 0 while j <= 3: @@ -2709,7 +2694,7 @@ def cleanPath(element, options): if is_same_sign(data[coordIndex], data[coordIndex+1]): data[coordIndex] += data[coordIndex+1] del data[coordIndex+1] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: coordIndex += 1 @@ -2722,7 +2707,7 @@ def cleanPath(element, options): data[coordIndex+1] += data[coordIndex+3] del data[coordIndex+2] # delete the next two elements del data[coordIndex+2] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: coordIndex += 2 @@ -2735,7 +2720,7 @@ def cleanPath(element, options): data[coordIndex+1] += data[coordIndex+3] del data[coordIndex+2] # delete the next two elements del data[coordIndex+2] - _num_path_segments_removed += 1 + stats.num_path_segments_removed += 1 else: coordIndex += 2 @@ -2770,7 +2755,7 @@ def cleanPath(element, options): # if for whatever reason we actually made the path longer don't use it # TODO: maybe we could compare path lengths after each optimization step and use the shortest if len(newPathStr) <= len(oldPathStr): - _num_bytes_saved_in_path_data += (len(oldPathStr) - len(newPathStr)) + stats.num_bytes_saved_in_path_data += (len(oldPathStr) - len(newPathStr)) element.setAttribute('d', newPathStr) @@ -2834,11 +2819,11 @@ def parseListOfPoints(s): return nums -def cleanPolygon(elem, options): +def clean_polygon(elem, options): """ Remove unnecessary closing point of polygon points attribute """ - global _num_points_removed_from_polygon + num_points_removed_from_polygon = 0 pts = parseListOfPoints(elem.getAttribute('points')) N = len(pts) / 2 @@ -2847,8 +2832,9 @@ def cleanPolygon(elem, options): (endx, endy) = pts[-2:] if startx == endx and starty == endy: del pts[-2:] - _num_points_removed_from_polygon += 1 + num_points_removed_from_polygon += 1 elem.setAttribute('points', scourCoordinates(pts, options, True)) + return num_points_removed_from_polygon def cleanPolyline(elem, options): @@ -3288,31 +3274,27 @@ def optimizeTransforms(element, options): return num -def removeComments(element): +def remove_comments(element, stats): """ Removes comments from the element and its children. """ - global _num_bytes_saved_in_comments - num = 0 if isinstance(element, xml.dom.minidom.Comment): - _num_bytes_saved_in_comments += len(element.data) + stats.num_bytes_saved_in_comments += len(element.data) + stats.num_comments_removed += 1 element.parentNode.removeChild(element) - num += 1 else: for subelement in element.childNodes[:]: - num += removeComments(subelement) - - return num + remove_comments(subelement, stats) -def embedRasters(element, options): +def embed_rasters(element, options): import base64 """ Converts raster references to inline images. NOTE: there are size limits to base64-encoding handling in browsers """ - global _num_rasters_embedded + num_rasters_embedded = 0 href = element.getAttributeNS(NS['XLINK'], 'href') @@ -3380,8 +3362,9 @@ def embedRasters(element, options): element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster.decode()) - _num_rasters_embedded += 1 + num_rasters_embedded += 1 del b64eRaster + return num_rasters_embedded def properlySizeDoc(docElement, options): @@ -3629,10 +3612,14 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False): # this is the main method # input is a string representation of the input XML # returns a string representation of the output XML -def scourString(in_string, options=None): +def scourString(in_string, options=None, stats=None): # sanitize options (take missing attributes from defaults, discard unknown attributes) options = sanitizeOptions(options) + if stats is None: + # This is easier than doing "if stats is not None:" checks all over the place + stats = ScourStats() + # default or invalid value if(options.cdigits < 0): options.cdigits = options.digits @@ -3645,37 +3632,6 @@ def scourString(in_string, options=None): scouringContext = Context(prec=options.digits) scouringContextC = Context(prec=options.cdigits) - # globals for tracking statistics - # TODO: get rid of these globals... - global _num_elements_removed - global _num_attributes_removed - global _num_ids_removed - global _num_comments_removed - global _num_style_properties_fixed - global _num_rasters_embedded - global _num_path_segments_removed - global _num_points_removed_from_polygon - global _num_bytes_saved_in_path_data - global _num_bytes_saved_in_colors - global _num_bytes_saved_in_comments - global _num_bytes_saved_in_ids - global _num_bytes_saved_in_lengths - global _num_bytes_saved_in_transforms - _num_elements_removed = 0 - _num_attributes_removed = 0 - _num_ids_removed = 0 - _num_comments_removed = 0 - _num_style_properties_fixed = 0 - _num_rasters_embedded = 0 - _num_path_segments_removed = 0 - _num_points_removed_from_polygon = 0 - _num_bytes_saved_in_path_data = 0 - _num_bytes_saved_in_colors = 0 - _num_bytes_saved_in_comments = 0 - _num_bytes_saved_in_ids = 0 - _num_bytes_saved_in_lengths = 0 - _num_bytes_saved_in_transforms = 0 - doc = xml.dom.minidom.parseString(in_string) # determine number of flowRoot elements in input document @@ -3690,14 +3646,14 @@ def scourString(in_string, options=None): print("WARNING: {}".format(errmsg), file=sys.stderr) # remove descriptive elements - removeDescriptiveElements(doc, options) + stats.num_elements_removed += remove_descriptive_elements(doc, options) # remove unneeded namespaced elements/attributes added by common editors if options.keep_editor_data is False: - _num_elements_removed += removeNamespacedElements(doc.documentElement, - unwanted_ns) - _num_attributes_removed += removeNamespacedAttributes(doc.documentElement, - unwanted_ns) + stats.num_elements_removed += removeNamespacedElements(doc.documentElement, + unwanted_ns) + stats.num_attributes_removed += removeNamespacedAttributes(doc.documentElement, + unwanted_ns) # remove the xmlns: declarations now xmlnsDeclsToRemove = [] @@ -3708,7 +3664,7 @@ def scourString(in_string, options=None): for attr in xmlnsDeclsToRemove: doc.documentElement.removeAttribute(attr) - _num_attributes_removed += 1 + stats.num_attributes_removed += len(xmlnsDeclsToRemove) # ensure namespace for SVG is declared # TODO: what if the default namespace is something else (i.e. some valid namespace)? @@ -3743,28 +3699,28 @@ def scourString(in_string, options=None): for attrName in xmlnsDeclsToRemove: doc.documentElement.removeAttribute(attrName) - _num_attributes_removed += 1 + stats.num_attributes_removed += len(xmlnsDeclsToRemove) for prefix in redundantPrefixes: remapNamespacePrefix(doc.documentElement, prefix, '') if options.strip_comments: - _num_comments_removed = removeComments(doc) + remove_comments(doc, stats) if options.strip_xml_space_attribute and doc.documentElement.hasAttribute('xml:space'): doc.documentElement.removeAttribute('xml:space') - _num_attributes_removed += 1 + stats.num_attributes_removed += 1 # repair style (remove unnecessary style properties and change them into XML attributes) - _num_style_properties_fixed = repairStyle(doc.documentElement, options) + stats.num_style_properties_fixed = repairStyle(doc.documentElement, options) # convert colors to #RRGGBB format if options.simple_colors: - _num_bytes_saved_in_colors = convertColors(doc.documentElement) + stats.num_bytes_saved_in_colors = convertColors(doc.documentElement) # remove unreferenced gradients/patterns outside of defs # and most unreferenced elements inside of defs - while removeUnreferencedElements(doc, options.keep_defs) > 0: + while remove_unreferenced_elements(doc, options.keep_defs, stats) > 0: pass # remove empty defs, metadata, g @@ -3782,29 +3738,30 @@ def scourString(in_string, options=None): removeElem = True if removeElem: elem.parentNode.removeChild(elem) - _num_elements_removed += 1 + stats.num_elements_removed += 1 if options.strip_ids: referencedIDs = findReferencedElements(doc.documentElement) identifiedElements = unprotected_ids(doc, options) - removeUnreferencedIDs(referencedIDs, identifiedElements) + stats.num_ids_removed += remove_unreferenced_ids(referencedIDs, + identifiedElements) - while removeDuplicateGradientStops(doc) > 0: + while remove_duplicate_gradient_stops(doc, stats) > 0: pass # remove gradients that are only referenced by one other gradient - while collapseSinglyReferencedGradients(doc) > 0: + while collapse_singly_referenced_gradients(doc, stats) > 0: pass # remove duplicate gradients - _num_elements_removed += removeDuplicateGradients(doc) + stats.num_elements_removed += removeDuplicateGradients(doc) if options.group_collapse: - _num_elements_removed += mergeSiblingGroupsWithCommonAttributes(doc.documentElement) + stats.num_elements_removed += mergeSiblingGroupsWithCommonAttributes(doc.documentElement) # create elements if there are runs of elements with the same attributes. # this MUST be before moveCommonAttributesToParentGroup. if options.group_create: - createGroupsForCommonAttributes(doc.documentElement) + create_groups_for_common_attributes(doc.documentElement, stats) # move common attributes to parent group # NOTE: the if the element's immediate children @@ -3813,20 +3770,20 @@ def scourString(in_string, options=None): # doesn't accept fill=, stroke= etc.! referencedIds = findReferencedElements(doc.documentElement) for child in doc.documentElement.childNodes: - _num_attributes_removed += moveCommonAttributesToParentGroup(child, referencedIds) + stats.num_attributes_removed += moveCommonAttributesToParentGroup(child, referencedIds) # remove unused attributes from parent - _num_attributes_removed += removeUnusedAttributesOnParent(doc.documentElement) + stats.num_attributes_removed += removeUnusedAttributesOnParent(doc.documentElement) # Collapse groups LAST, because we've created groups. If done before # moveAttributesToParentGroup, empty 's may remain. if options.group_collapse: - while removeNestedGroups(doc.documentElement) > 0: + while remove_nested_groups(doc.documentElement, stats) > 0: pass # remove unnecessary closing point of polygons and scour points for polygon in doc.documentElement.getElementsByTagName('polygon'): - cleanPolygon(polygon, options) + stats.num_points_removed_from_polygon += clean_polygon(polygon, options) # scour points of polyline for polyline in doc.documentElement.getElementsByTagName('polyline'): @@ -3837,11 +3794,11 @@ def scourString(in_string, options=None): if elem.getAttribute('d') == '': elem.parentNode.removeChild(elem) else: - cleanPath(elem, options) + clean_path(elem, options, stats) # shorten ID names as much as possible if options.shorten_ids: - _num_bytes_saved_in_ids += shortenIDs(doc, options.shorten_ids_prefix, options) + stats.num_bytes_saved_in_ids += shortenIDs(doc, options.shorten_ids_prefix, options) # scour lengths (including coordinates) for type in ['svg', 'image', 'rect', 'circle', 'ellipse', 'line', @@ -3858,18 +3815,18 @@ def scourString(in_string, options=None): doc.documentElement.setAttribute('viewBox', ' '.join(lengths)) # more length scouring in this function - _num_bytes_saved_in_lengths = reducePrecision(doc.documentElement) + stats.num_bytes_saved_in_lengths = reducePrecision(doc.documentElement) # remove default values of attributes - _num_attributes_removed += removeDefaultAttributeValues(doc.documentElement, options) + stats.num_attributes_removed += removeDefaultAttributeValues(doc.documentElement, options) # reduce the length of transformation attributes - _num_bytes_saved_in_transforms = optimizeTransforms(doc.documentElement, options) + stats.num_bytes_saved_in_transforms = optimizeTransforms(doc.documentElement, options) # convert rasters references to base64-encoded strings if options.embed_rasters: for elem in doc.documentElement.getElementsByTagName('image'): - embedRasters(elem, options) + stats.num_rasters_embedded += embed_rasters(elem, options) # properly size the SVG document (ideally width/height should be 100% with a viewBox) if options.enable_viewboxing: @@ -3903,7 +3860,7 @@ def scourString(in_string, options=None): # used mostly by unit tests # input is a filename # returns the minidom doc representation of the SVG -def scourXmlFile(filename, options=None): +def scourXmlFile(filename, options=None, stats=None): # sanitize options (take missing attributes from defaults, discard unknown attributes) options = sanitizeOptions(options) # we need to make sure infilename is set correctly (otherwise relative references in the SVG won't work) @@ -3912,7 +3869,7 @@ def scourXmlFile(filename, options=None): # open the file and scour it with open(filename, "rb") as f: in_string = f.read() - out_string = scourString(in_string, options) + out_string = scourString(in_string, options, stats=stats) # prepare the output xml.dom.minidom object doc = xml.dom.minidom.parseString(out_string.encode('utf-8')) @@ -4156,22 +4113,22 @@ def getInOut(options): return [infile, outfile] -def getReport(): +def generate_report(stats): return ( - ' Number of elements removed: ' + str(_num_elements_removed) + os.linesep + - ' Number of attributes removed: ' + str(_num_attributes_removed) + os.linesep + - ' Number of unreferenced IDs removed: ' + str(_num_ids_removed) + os.linesep + - ' Number of comments removed: ' + str(_num_comments_removed) + os.linesep + - ' Number of style properties fixed: ' + str(_num_style_properties_fixed) + os.linesep + - ' Number of raster images embedded: ' + str(_num_rasters_embedded) + os.linesep + - ' Number of path segments reduced/removed: ' + str(_num_path_segments_removed) + os.linesep + - ' Number of points removed from polygons: ' + str(_num_points_removed_from_polygon) + os.linesep + - ' Number of bytes saved in path data: ' + str(_num_bytes_saved_in_path_data) + os.linesep + - ' Number of bytes saved in colors: ' + str(_num_bytes_saved_in_colors) + os.linesep + - ' Number of bytes saved in comments: ' + str(_num_bytes_saved_in_comments) + os.linesep + - ' Number of bytes saved in IDs: ' + str(_num_bytes_saved_in_ids) + os.linesep + - ' Number of bytes saved in lengths: ' + str(_num_bytes_saved_in_lengths) + os.linesep + - ' Number of bytes saved in transformations: ' + str(_num_bytes_saved_in_transforms) + ' Number of elements removed: ' + str(stats.num_elements_removed) + os.linesep + + ' Number of attributes removed: ' + str(stats.num_attributes_removed) + os.linesep + + ' Number of unreferenced IDs removed: ' + str(stats.num_ids_removed) + os.linesep + + ' Number of comments removed: ' + str(stats.num_comments_removed) + os.linesep + + ' Number of style properties fixed: ' + str(stats.num_style_properties_fixed) + os.linesep + + ' Number of raster images embedded: ' + str(stats.num_rasters_embedded) + os.linesep + + ' Number of path segments reduced/removed: ' + str(stats.num_path_segments_removed) + os.linesep + + ' Number of points removed from polygons: ' + str(stats.num_points_removed_from_polygon) + os.linesep + + ' Number of bytes saved in path data: ' + str(stats.num_bytes_saved_in_path_data) + os.linesep + + ' Number of bytes saved in colors: ' + str(stats.num_bytes_saved_in_colors) + os.linesep + + ' Number of bytes saved in comments: ' + str(stats.num_bytes_saved_in_comments) + os.linesep + + ' Number of bytes saved in IDs: ' + str(stats.num_bytes_saved_in_ids) + os.linesep + + ' Number of bytes saved in lengths: ' + str(stats.num_bytes_saved_in_lengths) + os.linesep + + ' Number of bytes saved in transformations: ' + str(stats.num_bytes_saved_in_transforms) ) @@ -4180,10 +4137,11 @@ def start(options, input, output): options = sanitizeOptions(options) start = time.time() + stats = ScourStats() # do the work in_string = input.read() - out_string = scourString(in_string, options).encode("UTF-8") + out_string = scourString(in_string, options, stats=stats).encode("UTF-8") output.write(out_string) # Close input and output files (but do not attempt to close stdin/stdout!) @@ -4209,7 +4167,7 @@ def start(options, input, output): oldsize, sizediff), file=options.ensure_value("stdout", sys.stdout)) if options.verbose: - print(getReport(), file=options.ensure_value("stdout", sys.stdout)) + print(generate_report(stats), file=options.ensure_value("stdout", sys.stdout)) def run(): diff --git a/scour/stats.py b/scour/stats.py new file mode 100644 index 0000000..2762b92 --- /dev/null +++ b/scour/stats.py @@ -0,0 +1,28 @@ +class ScourStats(object): + + __slots__ = ( + 'num_elements_removed', + 'num_attributes_removed', + 'num_style_properties_fixed', + 'num_bytes_saved_in_colors', + 'num_ids_removed', + 'num_comments_removed', + 'num_style_properties_fixed', + 'num_rasters_embedded', + 'num_path_segments_removed', + 'num_points_removed_from_polygon', + 'num_bytes_saved_in_path_data', + 'num_bytes_saved_in_colors', + 'num_bytes_saved_in_comments', + 'num_bytes_saved_in_ids', + 'num_bytes_saved_in_lengths', + 'num_bytes_saved_in_transforms', + ) + + def __init__(self): + self.reset() + + def reset(self): + # Set all stats to 0 + for attr in self.__slots__: + setattr(self, attr, 0) From 841ad54e7f073ebe00950871928f9b77b4d6357b Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Tue, 23 Feb 2021 16:19:52 +0000 Subject: [PATCH 5/9] Refactor function to avoid double negative Signed-off-by: Niels Thykier --- scour/scour.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 6a69d61..9e45a2d 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1016,13 +1016,18 @@ def remove_descriptive_elements(doc, options): return len(elementsToRemove) -def g_tag_is_unmergeable(node): +def g_tag_is_mergeable(node): """Check if a tag can be merged or not tags with a title or descriptions should generally be left alone. """ - return any(True for n in node.childNodes if n.nodeType == Node.ELEMENT_NODE - and n.nodeName in ('title', 'desc') and n.namespaceURI == NS['SVG']) + if any( + True for n in node.childNodes + if n.nodeType == Node.ELEMENT_NODE and n.nodeName in ('title', 'desc') + and n.namespaceURI == NS['SVG'] + ): + return False + return True def remove_nested_groups(node, stats): @@ -1040,7 +1045,7 @@ def remove_nested_groups(node, stats): for child in node.childNodes: if child.nodeName == 'g' and child.namespaceURI == NS['SVG'] and len(child.attributes) == 0: # only collapse group if it does not have a title or desc as a direct descendant, - if not g_tag_is_unmergeable(child): + if g_tag_is_mergeable(child): groupsToRemove.append(child) for g in groupsToRemove: @@ -1168,7 +1173,7 @@ def mergeSiblingGroupsWithCommonAttributes(elem): if nextNode.nodeName != 'g' or nextNode.namespaceURI != NS['SVG']: break nextAttributes = {a.nodeName: a.nodeValue for a in nextNode.attributes.values()} - if attributes != nextAttributes or g_tag_is_unmergeable(nextNode): + if attributes != nextAttributes or not g_tag_is_mergeable(nextNode): break else: runElements += 1 From fbf0c06e845b585f6ee3bfe5a7470772ab7d86d3 Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Tue, 23 Feb 2021 16:53:21 +0000 Subject: [PATCH 6/9] Avoid mutating a mutable kwarg Signed-off-by: Niels Thykier --- scour/scour.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 9e45a2d..91326c6 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2109,7 +2109,7 @@ def removeDefaultAttributeValue(node, attribute): """ Removes the DefaultAttribute 'attribute' from 'node' if specified conditions are fulfilled - Warning: Does NOT check if the attribute is actually valid for the passed element type for increased preformance! + Warning: Does NOT check if the attribute is actually valid for the passed element type for increased performance! """ if not node.hasAttribute(attribute.name): return 0 @@ -2134,7 +2134,7 @@ def removeDefaultAttributeValue(node, attribute): return 0 -def removeDefaultAttributeValues(node, options, tainted=set()): +def removeDefaultAttributeValues(node, options, tainted=None): u"""'tainted' keeps a set of attributes defined in parent nodes. For such attributes, we don't delete attributes with default values.""" @@ -2142,6 +2142,9 @@ def removeDefaultAttributeValues(node, options, tainted=set()): if node.nodeType != Node.ELEMENT_NODE: return 0 + if tainted is None: + tainted = set() + # Conditionally remove all default attributes defined in 'default_attributes' (a list of 'DefaultAttribute's) # # For increased performance do not iterate the whole list for each element but run only on valid subsets From 897e3f565c13fb27b1c19ece860269f3ba70e9fd Mon Sep 17 00:00:00 2001 From: Wolfgang Bangerth Date: Thu, 22 Apr 2021 10:59:44 -0600 Subject: [PATCH 7/9] Minor language edits in README.md. --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b8fecb0..660ee5c 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,11 @@ --- -Scour is an SVG optimizer/cleaner that reduces the size of scalable vector graphics by optimizing structure and removing unnecessary data written in Python. +Scour is an SVG optimizer/cleaner written in Python that reduces the size of scalable vector graphics by optimizing structure and removing unnecessary data. It can be used to create streamlined vector graphics suitable for web deployment, publishing/sharing or further processing. -The goal of Scour is to output a file that renderes identically at a fraction of the size by removing a lot of redundant information created by most SVG editors. Optimization options are typically lossless but can be tweaked for more agressive cleaning. +The goal of Scour is to output a file that renders identically at a fraction of the size by removing a lot of redundant information created by most SVG editors. Optimization options are typically lossless but can be tweaked for more agressive cleaning. Scour is open-source and licensed under [Apache License 2.0](https://github.com/codedread/scour/blob/master/LICENSE). From 85f4b49d59073025cbb3d7ca8a7ba175d18c047c Mon Sep 17 00:00:00 2001 From: "David H. Gutteridge" Date: Sun, 18 Jul 2021 13:56:15 -0400 Subject: [PATCH 8/9] Fix a typo in README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 660ee5c..ace6711 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Scour is an SVG optimizer/cleaner written in Python that reduces the size of sca It can be used to create streamlined vector graphics suitable for web deployment, publishing/sharing or further processing. -The goal of Scour is to output a file that renders identically at a fraction of the size by removing a lot of redundant information created by most SVG editors. Optimization options are typically lossless but can be tweaked for more agressive cleaning. +The goal of Scour is to output a file that renders identically at a fraction of the size by removing a lot of redundant information created by most SVG editors. Optimization options are typically lossless but can be tweaked for more aggressive cleaning. Scour is open-source and licensed under [Apache License 2.0](https://github.com/codedread/scour/blob/master/LICENSE). From 0609c596766ec98e4e2092b49bd03b802702ba1a Mon Sep 17 00:00:00 2001 From: a1346054 <36859588+a1346054@users.noreply.github.com> Date: Mon, 30 Aug 2021 17:17:00 +0000 Subject: [PATCH 9/9] Fix spelling (#284) --- CONTRIBUTING.md | 4 ++-- HISTORY.md | 12 ++++++------ scour/scour.py | 6 +++--- setup.py | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b34dbf1..96cb109 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -4,7 +4,7 @@ Contributions to Scour are welcome, feel free to create a pull request! In order to be able to merge your PR as fast as possible please try to stick to the following guidelines. -> _**TL;DR** (if you now what you're doing) – Always run [`make check`](https://github.com/scour-project/scour/blob/master/Makefile) before creating a PR to check for common problems._ +> _**TL;DR** (if you now what you're doing) – Always run [`make check`](https://github.com/scour-project/scour/blob/master/Makefile) before creating a PR to check for common problems._ ## Code Style @@ -32,4 +32,4 @@ To ensure that all possible code conditions are covered by a test you can use [` make coverage ``` -These reports are also created automatically by our TravisCI builds and are accessible via [Codecov](https://codecov.io/gh/scour-project/scour) \ No newline at end of file +These reports are also created automatically by our TravisCI builds and are accessible via [Codecov](https://codecov.io/gh/scour-project/scour) diff --git a/HISTORY.md b/HISTORY.md index 1661d5a..de0b503 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -47,7 +47,7 @@ ## Version 0.35 (2016-09-14) * Drop official support for Python 2.6. (While it will probably continue to work for a while compatibility is not guaranteed anymore. If you continue to use Scour with Python 2.6 and should find/fix any compatibility issues pull requests are welcome, though.) -* Fix: Unused IDs were not shortended when `--shorten-ids` was used. ([#19](https://github.com/scour-project/scour/issues/62)) +* Fix: Unused IDs were not shortened when `--shorten-ids` was used. ([#19](https://github.com/scour-project/scour/issues/62)) * Fix: Most elements were still removed from `` when `--keep-unreferenced-defs` was used. ([#62](https://github.com/scour-project/scour/issues/62)) * Improve escaping of single/double quotes ('/") in attributes. ([#64](https://github.com/scour-project/scour/issues/64)) * Print usage information if no input file was specified (and no data is available from `stdin`). ([#65](https://github.com/scour-project/scour/issues/65)) @@ -56,13 +56,13 @@ * Improve code to remove default attribute values and add a lot of new default values. ([#70](https://github.com/scour-project/scour/issues/70)) * Fix: Only attempt to group elements that the content model allows to be children of a `` when `--create-groups` is specified. ([#98](https://github.com/scour-project/scour/issues/98)) * Fix: Update list of SVG presentation attributes allowing more styles to be converted to attributes and remove two entries (`line-height` and `visibility`) that were actually invalid. ([#99](https://github.com/scour-project/scour/issues/99)) -* Add three options that work analoguous to `--remove-metadata` (removes `` elements) ([#102](https://github.com/scour-project/scour/issues/102)) +* Add three options that work analogous to `--remove-metadata` (removes `` elements) ([#102](https://github.com/scour-project/scour/issues/102)) * `--remove-titles` (removes `` elements) * `--remove-descriptions` (removes `` elements) * `--remove-descriptive-elements` (removes all of the descriptive elements, i.e. ``, `<desc>` and `<metadata>`) * Fix removal rules for the `overflow` attribute. ([#104](https://github.com/scour-project/scour/issues/104)) * Improvement: Automatically order all attributes ([#105](https://github.com/scour-project/scour/issues/105)), as well as `style` declarations ([#107](https://github.com/scour-project/scour/issues/107)) allowing for a constant output across multiple runs of Scour. Before order could change arbitrarily. -* Improve path scouring. ([#108](https://github.com/scour-project/scour/issues/108))<br>Notably Scour performs all caculations with enhanced precision now, guaranteeing maximum accuracy when optimizing path data. Numerical precision is reduced as a last step of the optimization according to the `--precision` option. +* Improve path scouring. ([#108](https://github.com/scour-project/scour/issues/108))<br>Notably Scour performs all calculations with enhanced precision now, guaranteeing maximum accuracy when optimizing path data. Numerical precision is reduced as a last step of the optimization according to the `--precision` option. * Fix replacement of removed duplicate gradients if the `fill`/`stroke` properties contained a fallback. ([#109](https://github.com/scour-project/scour/issues/109)) * Fix conversion of cubic BĂ©zier "curveto" commands into "shorthand/smooth curveto" commands. ([#110](https://github.com/scour-project/scour/issues/110)) * Fix some issues due to removal of properties without considering inheritance rules. ([#111](https://github.com/scour-project/scour/issues/111)) @@ -74,7 +74,7 @@ * Input/output file can now be specified as positional arguments (e.g. `scour input.svg output.svg`). ([#46](https://github.com/scour-project/scour/issues/46)) * Improve `--help` output by intuitively arranging options in groups. ([#46](https://github.com/scour-project/scour/issues/46)) * Add option `--error-on-flowtext` to raise an exception whenever a non-standard `<flowText>` element is found (which is only supported in Inkscape). If this option is not specified a warning will be shown. ([#53](https://github.com/scour-project/scour/issues/53)) -* Automate tests with continouous integration via Travis. ([#52](https://github.com/scour-project/scour/issues/52)) +* Automate tests with continuous integration via Travis. ([#52](https://github.com/scour-project/scour/issues/52)) ## Version 0.33 (2016-01-29) @@ -90,7 +90,7 @@ * Fix a potential regex matching issue in `points` attribute of `<polygon>` and `<polyline>` elements. ([#24](https://github.com/scour-project/scour/issues/24)) * Fix a crash with `points` attribute of `<polygon>` and `<polyline>` starting with a negative number. ([#24](https://github.com/scour-project/scour/issues/24)) * Fix encoding issues when input file contained unicode characters. ([#27](https://github.com/scour-project/scour/issues/27)) -* Fix encoding issues when using `stding`/`stdout` as input/output. ([#27](https://github.com/scour-project/scour/issues/27)) +* Fix encoding issues when using `stding`/`stdout` as input/output. ([#27](https://github.com/scour-project/scour/issues/27)) * Fix removal of comments. If a node contained multiple comments usually not all of them were removed. ([#28](https://github.com/scour-project/scour/issues/28)) @@ -104,7 +104,7 @@ ## Version 0.30 (2014-08-05) -* Fix ingoring of additional args when invoked from scons. +* Fix ignoring of additional args when invoked from scons. ## Version 0.29 (2014-07-26) diff --git a/scour/scour.py b/scour/scour.py index 91326c6..9d19906 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -1484,7 +1484,7 @@ def collapse_singly_referenced_gradients(doc, stats): if target_href: # If the elem node had an xlink:href, then the # refElem have to point to it as well to - # perserve the semantics of the image. + # preserve the semantics of the image. refElem.setAttributeNS(NS['XLINK'], 'href', target_href) else: # The elem node had no xlink:href reference, @@ -1883,7 +1883,7 @@ def styleInheritedByChild(node, style, nodeIsChild=False): 'missing-glyph', 'pattern', 'svg', 'switch', 'symbol']: return False - # in all other cases we have to assume the inherited value of 'style' is meaningfull and has to be kept + # in all other cases we have to assume the inherited value of 'style' is meaningful and has to be kept # (e.g nodes without children at the end of the DOM tree, text nodes, ...) return True @@ -2925,7 +2925,7 @@ def scourCoordinates(data, options, force_whitespace=False, control_points=[], f # - this number starts with a dot but the previous number had *no* dot or exponent # i.e. '1.3 0.5' -> '1.3.5' or '1e3 0.5' -> '1e3.5' is fine but '123 0.5' -> '123.5' is obviously not # - 'force_whitespace' is explicitly set to 'True' - # we never need a space after flags (occuring in elliptical arcs), but librsvg struggles without it + # we never need a space after flags (occurring in elliptical arcs), but librsvg struggles without it if (c > 0 and (force_whitespace or scouredCoord[0].isdigit() diff --git a/setup.py b/setup.py index cf3ed08..990b596 100644 --- a/setup.py +++ b/setup.py @@ -28,10 +28,10 @@ vector graphics by optimizing structure and removing unnecessary data. It can be used to create streamlined vector graphics suitable for web deployment, publishing/sharing or further processing. -The goal of Scour is to output a file that renderes identically at a +The goal of Scour is to output a file that renders identically at a fraction of the size by removing a lot of redundant information created by most SVG editors. Optimization options are typically lossless but can -be tweaked for more agressive cleaning. +be tweaked for more aggressive cleaning. Website - http://www.codedread.com/scour/ (original website)