From c698522c28b5bf61ad4bd0c1e42452255e59b5ef Mon Sep 17 00:00:00 2001 From: Eduard Braun Date: Thu, 10 Dec 2015 22:50:31 +0100 Subject: [PATCH] Simplify and fix "removeComments()" * The separate treatment of comments at the documentElement's level is not necessary - they have a parent (as tested in Python 3.5.0 and 2.7.11 and 2.6.6)! It might not have worked before due to a typo - note the "if isinstance(element,...)" and "len(element.data)" which should both refer to "subelement" instead - or a bug in very old versions of Python). * Fix the iteration over childNodes (i.e. replace "for subelement in element.childNodes:" with ""for subelement in element.childNodes[:]:". We have to create a copy of the list to iterate over, otherwise we'd be iterating over a list as we change it which leads to unpredictable results. Fixes #25 --- scour/scour.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 335bd0e..d4a4628 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2587,20 +2587,11 @@ def removeComments(element) : """ global numCommentBytes - if isinstance(element, xml.dom.minidom.Document): - # must process the document object separately, because its - # documentElement's nodes have None as their parentNode - for subelement in element.childNodes: - if isinstance(element, xml.dom.minidom.Comment): - numCommentBytes += len(element.data) - element.documentElement.removeChild(subelement) - else: - removeComments(subelement) - elif isinstance(element, xml.dom.minidom.Comment): + if isinstance(element, xml.dom.minidom.Comment): numCommentBytes += len(element.data) element.parentNode.removeChild(element) else: - for subelement in element.childNodes: + for subelement in element.childNodes[:]: removeComments(subelement)