From 738b7d7c5d41759909c897af465c9ea512c4add6 Mon Sep 17 00:00:00 2001 From: Eduard Braun Date: Tue, 16 Aug 2016 00:07:29 +0200 Subject: [PATCH] Don't escape quotes ('/") in text nodes and attributes. - In text nodes quotes are fine - In attributes quotes are fine if used reciprocally. Escaping in the latter case often causes issues, e.g. with quoted font names (#21) or inline CSS styles (#56), while it probably does not gain anything (if quotes are wrongly used in attribute names the XML is most likely invalid to start with) --- scour/scour.py | 5 ++++- testscour.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 5e9aaf0..430cfa1 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2752,7 +2752,10 @@ def remapNamespacePrefix(node, oldprefix, newprefix): def makeWellFormed(str): - xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'} + # Don't escape quotation marks for now as they are fine in text nodes + # as well as in attributes if used reciprocally + # xml_ents = { '<':'<', '>':'>', '&':'&', "'":''', '"':'"'} + xml_ents = { '<':'<', '>':'>', '&':'&'} # starr = [] # for c in str: diff --git a/testscour.py b/testscour.py index 06eaf49..322b7f9 100755 --- a/testscour.py +++ b/testscour.py @@ -1114,7 +1114,7 @@ class EnsureLineEndings(unittest.TestCase): class XmlEntities(unittest.TestCase): def runTest(self): - self.assertEqual( scour.makeWellFormed('<>&"\''), '<>&"'', + self.assertEqual( scour.makeWellFormed('<>&'), '<>&', 'Incorrectly translated XML entities') class DoNotStripCommentsOutsideOfRoot(unittest.TestCase):