Serialization: Avoid creating a single-use dict in each call to make_well_formed

Signed-off-by: Niels Thykier <niels@thykier.net>
This commit is contained in:
Niels Thykier 2020-05-19 21:36:58 +00:00
parent 21f1262bcb
commit 5be6b03d7c
No known key found for this signature in database
GPG key ID: A65B78DBE67C7AAC
2 changed files with 34 additions and 27 deletions

View file

@ -74,6 +74,12 @@ VER = __version__
COPYRIGHT = u'Copyright Jeff Schiller, Louis Simard, 2010' COPYRIGHT = u'Copyright Jeff Schiller, Louis Simard, 2010'
XML_ENTS_NO_QUOTES = {'<': '&lt;', '>': '&gt;', '&': '&amp;'}
XML_ENTS_ESCAPE_APOS = XML_ENTS_NO_QUOTES.copy()
XML_ENTS_ESCAPE_APOS["'"] = '&apos;'
XML_ENTS_ESCAPE_QUOT = XML_ENTS_NO_QUOTES.copy()
XML_ENTS_ESCAPE_QUOT['"'] = '&quot;'
NS = {'SVG': 'http://www.w3.org/2000/svg', NS = {'SVG': 'http://www.w3.org/2000/svg',
'XLINK': 'http://www.w3.org/1999/xlink', 'XLINK': 'http://www.w3.org/1999/xlink',
'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd', 'SODIPODI': 'http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd',
@ -3404,23 +3410,23 @@ def remapNamespacePrefix(node, oldprefix, newprefix):
remapNamespacePrefix(child, oldprefix, newprefix) remapNamespacePrefix(child, oldprefix, newprefix)
def makeWellFormed(str, quote=''): def make_well_formed(text, quote_dict=None):
xml_ents = {'<': '&lt;', '>': '&gt;', '&': '&amp;'} if quote_dict is None:
if quote: quote_dict = XML_ENTS_NO_QUOTES
xml_ents[quote] = '&apos;' if (quote == "'") else "&quot;" return ''.join(quote_dict[c] if c in quote_dict else c for c in text)
return ''.join(xml_ents[c] if c in xml_ents else c for c in str)
def chooseQuoteCharacter(str): def choose_quote_character(value):
quotCount = str.count('"') quot_count = value.count('"')
aposCount = str.count("'") if quot_count == 0 or quot_count <= value.count("'"):
if quotCount > aposCount: # Fewest "-symbols (if there are 0, we pick this to avoid spending
quote = "'" # time counting the '-symbols as it won't matter)
hasEmbeddedQuote = aposCount
else:
quote = '"' quote = '"'
hasEmbeddedQuote = quotCount xml_ent = XML_ENTS_ESCAPE_QUOT
return (quote, hasEmbeddedQuote) else:
quote = "'"
xml_ent = XML_ENTS_ESCAPE_APOS
return quote, xml_ent
TEXT_CONTENT_ELEMENTS = ['text', 'tspan', 'tref', 'textPath', 'altGlyph', TEXT_CONTENT_ELEMENTS = ['text', 'tspan', 'tref', 'textPath', 'altGlyph',
@ -3472,8 +3478,8 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
attr = attrList.item(index) attr = attrList.item(index)
attrValue = attr.nodeValue attrValue = attr.nodeValue
(quote, hasEmbeddedQuote) = chooseQuoteCharacter(attrValue) quote, xml_ent = choose_quote_character(attrValue)
attrValue = makeWellFormed(attrValue, quote if hasEmbeddedQuote else '') attrValue = make_well_formed(attrValue, xml_ent)
if attr.nodeName == 'style': if attr.nodeName == 'style':
# sort declarations # sort declarations
@ -3532,7 +3538,7 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False):
text_content = text_content.replace(' ', ' ') text_content = text_content.replace(' ', ' ')
else: else:
text_content = text_content.strip() text_content = text_content.strip()
outParts.append(makeWellFormed(text_content)) outParts.append(make_well_formed(text_content))
# CDATA node # CDATA node
elif child.nodeType == Node.CDATA_SECTION_NODE: elif child.nodeType == Node.CDATA_SECTION_NODE:
outParts.extend(['<![CDATA[', child.nodeValue, ']]>']) outParts.extend(['<![CDATA[', child.nodeValue, ']]>'])

View file

@ -30,7 +30,8 @@ import unittest
import six import six
from six.moves import map, range from six.moves import map, range
from scour.scour import makeWellFormed, parse_args, scourString, scourXmlFile, start, run from scour.scour import (make_well_formed, parse_args, scourString, scourXmlFile, start, run,
XML_ENTS_ESCAPE_APOS, XML_ENTS_ESCAPE_QUOT)
from scour.svg_regex import svg_parser from scour.svg_regex import svg_parser
from scour import __version__ from scour import __version__
@ -1893,26 +1894,26 @@ class EnsureLineEndings(unittest.TestCase):
class XmlEntities(unittest.TestCase): class XmlEntities(unittest.TestCase):
def runTest(self): def runTest(self):
self.assertEqual(makeWellFormed('<>&'), '&lt;&gt;&amp;', self.assertEqual(make_well_formed('<>&'), '&lt;&gt;&amp;',
'Incorrectly translated unquoted XML entities') 'Incorrectly translated unquoted XML entities')
self.assertEqual(makeWellFormed('<>&', "'"), '&lt;&gt;&amp;', self.assertEqual(make_well_formed('<>&', XML_ENTS_ESCAPE_APOS), '&lt;&gt;&amp;',
'Incorrectly translated single-quoted XML entities') 'Incorrectly translated single-quoted XML entities')
self.assertEqual(makeWellFormed('<>&', '"'), '&lt;&gt;&amp;', self.assertEqual(make_well_formed('<>&', XML_ENTS_ESCAPE_QUOT), '&lt;&gt;&amp;',
'Incorrectly translated double-quoted XML entities') 'Incorrectly translated double-quoted XML entities')
self.assertEqual(makeWellFormed("'"), "'", self.assertEqual(make_well_formed("'"), "'",
'Incorrectly translated unquoted single quote') 'Incorrectly translated unquoted single quote')
self.assertEqual(makeWellFormed('"'), '"', self.assertEqual(make_well_formed('"'), '"',
'Incorrectly translated unquoted double quote') 'Incorrectly translated unquoted double quote')
self.assertEqual(makeWellFormed("'", '"'), "'", self.assertEqual(make_well_formed("'", XML_ENTS_ESCAPE_QUOT), "'",
'Incorrectly translated double-quoted single quote') 'Incorrectly translated double-quoted single quote')
self.assertEqual(makeWellFormed('"', "'"), '"', self.assertEqual(make_well_formed('"', XML_ENTS_ESCAPE_APOS), '"',
'Incorrectly translated single-quoted double quote') 'Incorrectly translated single-quoted double quote')
self.assertEqual(makeWellFormed("'", "'"), '&apos;', self.assertEqual(make_well_formed("'", XML_ENTS_ESCAPE_APOS), '&apos;',
'Incorrectly translated single-quoted single quote') 'Incorrectly translated single-quoted single quote')
self.assertEqual(makeWellFormed('"', '"'), '&quot;', self.assertEqual(make_well_formed('"', XML_ENTS_ESCAPE_QUOT), '&quot;',
'Incorrectly translated double-quoted double quote') 'Incorrectly translated double-quoted double quote')