diff --git a/scour/scour.py b/scour/scour.py index 8feb15c..a46af61 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -3334,8 +3334,6 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False): children = element.childNodes if children.length == 0: outParts.append('/>') - if indent_depth > 0: - outParts.append(newline) else: outParts.append('>') @@ -3343,34 +3341,47 @@ def serializeXML(element, options, indent_depth=0, preserveWhitespace=False): for child in element.childNodes: # element node if child.nodeType == Node.ELEMENT_NODE: - if preserveWhitespace: + # do not indent inside text content elements as in SVG there's a difference between + # "text1\ntext2" and + # "text1\n text2" + # see https://www.w3.org/TR/SVG/text.html#WhiteSpace + if preserveWhitespace or element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']: outParts.append(serializeXML(child, options, 0, preserveWhitespace)) else: outParts.extend([newline, serializeXML(child, options, indent_depth + 1, preserveWhitespace)]) onNewLine = True # text node elif child.nodeType == Node.TEXT_NODE: - # trim it only in the case of not being a child of an element - # where whitespace might be important - if preserveWhitespace: - outParts.append(makeWellFormed(child.nodeValue)) - else: - outParts.append(makeWellFormed(child.nodeValue.strip())) + text_content = child.nodeValue + if not preserveWhitespace: + # strip / consolidate whitespace according to spec, see + # https://www.w3.org/TR/SVG/text.html#WhiteSpace + if element.nodeName in ['text', 'tspan', 'tref', 'textPath', 'altGlyph']: + text_content = text_content.replace('\n', '') + text_content = text_content.replace('\t', ' ') + if child == element.firstChild: + text_content = text_content.lstrip() + elif child == element.lastChild: + text_content = text_content.rstrip() + while ' ' in text_content: + text_content = text_content.replace(' ', ' ') + else: + text_content = text_content.strip() + outParts.append(makeWellFormed(text_content)) # CDATA node elif child.nodeType == Node.CDATA_SECTION_NODE: outParts.extend(['']) # Comment node elif child.nodeType == Node.COMMENT_NODE: - outParts.extend(['']) + outParts.extend([newline, indent_type * (indent_depth+1), '']) # TODO: entities, processing instructions, what else? else: # ignore the rest pass if onNewLine: + outParts.append(newline) outParts.append(indent_type * indent_depth) outParts.extend(['']) - if indent_depth > 0: - outParts.append(newline) return "".join(outParts) @@ -3632,13 +3643,6 @@ def scourString(in_string, options=None): # out_string = doc.documentElement.toprettyxml(' ') out_string = serializeXML(doc.documentElement, options) + '\n' - # now strip out empty lines - lines = [] - # Get rid of empty lines - for line in out_string.splitlines(True): - if line.strip(): - lines.append(line) - # return the string with its XML prolog and surrounding comments if options.strip_xml_prolog is False: total_output = ' - - This is some messed-up markup - -'''.splitlines() - for i in range(4): - self.assertEqual(s[i], c[i], - 'Whitespace not preserved for line ' + str(i)) + def setUp(self): + self.doc = scourXmlFile('unittests/whitespace.svg') + def test_basic(self): + text = self.doc.getElementById('txt_a1') + self.assertIn('text1 text2', text.toxml(), + 'Multiple spaces not stripped from text element') + text = self.doc.getElementById('txt_a2') + self.assertIn('text1 text2', text.toxml(), + 'Tab not replaced with space in text element') + text = self.doc.getElementById('txt_a3') + self.assertIn('text1 text2', text.toxml(), + 'Multiple spaces not stripped from text element with xml:space="default"') + text = self.doc.getElementById('txt_a4') + self.assertIn('text1 text2', text.toxml(), + 'Tab not replaced with space in text element with xml:space="default"') + text = self.doc.getElementById('txt_a5') + self.assertIn('text1 text2', text.toxml(), + 'Multiple spaces not preserved in text element with xml:space="preserve"') + text = self.doc.getElementById('txt_a6') + self.assertIn('text1\ttext2', text.toxml(), + 'Tab not preserved in text element with xml:space="preserve"') -class DoNotPrettyPrintWhenNestedWhitespacePreserved(unittest.TestCase): + def test_newlines(self): + text = self.doc.getElementById('txt_b1') + self.assertIn('text1 text2', text.toxml(), + 'Newline not replaced with space in text element') + text = self.doc.getElementById('txt_b2') + self.assertIn('text1 text2', text.toxml(), + 'Newline not replaced with space in text element with xml:space="default"') + text = self.doc.getElementById('txt_b3') + self.assertIn('text1\n text2', text.toxml(), + 'Newline not preserved in text element with xml:space="preserve"') - def runTest(self): - with open('unittests/whitespace-nested.svg') as f: - s = scourString(f.read()).splitlines() - c = ''' - - Use bold text - -'''.splitlines() - for i in range(4): - self.assertEqual(s[i], c[i], - 'Whitespace not preserved when nested for line ' + str(i)) + def test_inheritance(self): + text = self.doc.getElementById('txt_c1') + self.assertIn('text1 text2', text.toxml(), + ' does not inherit xml:space="preserve" of parent text element') + text = self.doc.getElementById('txt_c2') + self.assertIn('text1 text2', text.toxml(), + 'xml:space="default" of does not overwrite xml:space="preserve" of parent text element') + text = self.doc.getElementById('txt_c3') + self.assertIn('text1 text2', text.toxml(), + 'xml:space="preserve" of does not overwrite xml:space="default" of parent text element') + text = self.doc.getElementById('txt_c4') + self.assertIn('text1 text2', text.toxml(), + ' does not inherit xml:space="preserve" of parent group') + text = self.doc.getElementById('txt_c5') + self.assertIn('text1 text2', text.toxml(), + 'xml:space="default" of text element does not overwrite xml:space="preserve" of parent group') + text = self.doc.getElementById('txt_c6') + self.assertIn('text1 text2', text.toxml(), + 'xml:space="preserve" of text element does not overwrite xml:space="default" of parent group') + + def test_important_whitespace(self): + text = self.doc.getElementById('txt_d1') + self.assertIn('text1 text2', text.toxml(), + 'Newline with whitespace collapsed in text element') + text = self.doc.getElementById('txt_d2') + self.assertIn('text1 tspan1 text2', text.toxml(), + 'Whitespace stripped from the middle of a text element') + text = self.doc.getElementById('txt_d3') + self.assertIn('text1 tspan1 tspan2 text2', text.toxml(), + 'Whitespace stripped from the middle of a text element') + + def test_incorrect_whitespace(self): + text = self.doc.getElementById('txt_e1') + self.assertIn('text1text2', text.toxml(), + 'Whitespace introduced in text element with newline') + text = self.doc.getElementById('txt_e2') + self.assertIn('text1tspantext2', text.toxml(), + 'Whitespace introduced in text element with ') + text = self.doc.getElementById('txt_e3') + self.assertIn('text1tspantext2', text.toxml(), + 'Whitespace introduced in text element with and newlines') class GetAttrPrefixRight(unittest.TestCase): @@ -1807,10 +1856,10 @@ class HandleEmptyStyleElement(unittest.TestCase): class EnsureLineEndings(unittest.TestCase): def runTest(self): - with open('unittests/whitespace-important.svg') as f: + with open('unittests/newlines.svg') as f: s = scourString(f.read()) - self.assertEqual(len(s.splitlines()), 4, - 'Did not output line ending character correctly') + self.assertEqual(len(s.splitlines()), 24, + 'Did handle reading or outputting line ending characters correctly') class XmlEntities(unittest.TestCase): diff --git a/unittests/newlines.svg b/unittests/newlines.svg new file mode 100644 index 0000000..a909603 --- /dev/null +++ b/unittests/newlines.svg @@ -0,0 +1,50 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/unittests/whitespace-important.svg b/unittests/whitespace-important.svg deleted file mode 100644 index 6918044..0000000 --- a/unittests/whitespace-important.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - This is some messed-up markup - diff --git a/unittests/whitespace-nested.svg b/unittests/whitespace-nested.svg deleted file mode 100644 index 3b99356..0000000 --- a/unittests/whitespace-nested.svg +++ /dev/null @@ -1,4 +0,0 @@ - - - Use bold text - diff --git a/unittests/whitespace.svg b/unittests/whitespace.svg new file mode 100644 index 0000000..2bb48a6 --- /dev/null +++ b/unittests/whitespace.svg @@ -0,0 +1,40 @@ + + + + text1 text2 + text1 text2 + text1 text2 + text1 text2 + text1 text2 + text1 text2 + + + text1 + text2 + text1 + text2 + text1 + text2 + + + text1 text2 + text1 text2 + text1 text2 + text1 text2 + text1 text2 + text1 text2 + + + text1 + text2 + text1 tspan1 text2 + text1 tspan1 tspan2 text2 + + + text1 +text2 + text1tspantext2 + text1 +tspan +text2 +