From a2c94c96fb75a8b62b99b2af81d8f5d291fd2bb3 Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 11 Mar 2018 07:18:27 +0000 Subject: [PATCH 1/2] Disable the "m0 0"-optimization as it is wrong in some cases The "m0 0" rewrite gets some cases wrong, like: m150 240h200m0 0 150 150v-300z Scour rewrote that into the following m150 240h200l150 150v-300z However, these two paths do not produce an identical figure at all. The first is a line followed by a triangle while the second is a quadrilateral. While there are some instances we can rewrite (that scour will no longer rewrite), these will require an analysis over multiple commands to determine whether the rewrite is safe. This will reappear in the next commit. Closes: #163 Signed-off-by: Niels Thykier --- scour/scour.py | 17 +++++++++-------- testscour.py | 5 +++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/scour/scour.py b/scour/scour.py index 1e990c4..806747e 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2190,14 +2190,15 @@ def cleanPath(element, options): i = 0 if cmd in ['m', 'l', 't']: if cmd == 'm': - # remove m0,0 segments - if pathIndex > 0 and data[0] == data[i + 1] == 0: - # 'm0,0 x,y' can be replaces with 'lx,y', - # except the first m which is a required absolute moveto - path[pathIndex] = ('l', data[2:]) - _num_path_segments_removed += 1 - else: # else skip move coordinate - i = 2 + # It might be tempting to rewrite "m0 0 ..." into + # "l..." here. However, this is an unsound + # optimization in general as "m0 0 ... z" is + # different from "l...z". + # + # To do such a rewrite, we need to understand the + # full subpath, so for now just leave the first + # two coordinates of "m" alone. + i = 2 while i < len(data): if data[i] == data[i + 1] == 0: del data[i:i + 2] diff --git a/testscour.py b/testscour.py index 060b095..b6abfd5 100755 --- a/testscour.py +++ b/testscour.py @@ -2058,8 +2058,9 @@ class PathEmptyMove(unittest.TestCase): def runTest(self): doc = scourXmlFile('unittests/path-empty-move.svg') - self.assertEqual(doc.getElementsByTagName('path')[0].getAttribute('d'), 'm100 100 200 100z') - self.assertEqual(doc.getElementsByTagName('path')[1].getAttribute('d'), 'm100 100v200l100 100z') + # This path can actually be optimized to avoid the "m0 0z". + self.assertEqual(doc.getElementsByTagName('path')[0].getAttribute('d'), 'm100 100 200 100m0 0z') + self.assertEqual(doc.getElementsByTagName('path')[1].getAttribute('d'), 'm100 100v200m0 0 100 100z') class DefaultsRemovalToplevel(unittest.TestCase): From 38274f75bc90f998b4a0448481d2dd8800c21d7a Mon Sep 17 00:00:00 2001 From: Niels Thykier Date: Sun, 11 Mar 2018 08:22:27 +0000 Subject: [PATCH 2/2] Implement a basic rewrite of redundant commands This basic implementation can drop and rewrite some cases of "m0 0" and "z" without triggering the issues experienced in #163. It works by analysing the path backwards and tracking "z" and "m" commands. Signed-off-by: Niels Thykier --- scour/scour.py | 46 +++++++++++++++++++++++++++-- testscour.py | 22 ++++++++++---- unittests/path-command-rewrites.svg | 8 +++++ unittests/path-empty-move.svg | 5 ---- 4 files changed, 68 insertions(+), 13 deletions(-) create mode 100644 unittests/path-command-rewrites.svg delete mode 100644 unittests/path-empty-move.svg diff --git a/scour/scour.py b/scour/scour.py index 806747e..dcef9ce 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -2181,10 +2181,11 @@ def cleanPath(element, options): x, y = startx, starty path[pathIndex] = ('z', data) - # remove empty segments + # remove empty segments and redundant commands # Reuse the data structure 'path' and the coordinate lists, even if we're # deleting items, because these deletions are relatively cheap. if not has_round_or_square_linecaps: + # remove empty path segments for pathIndex in range(len(path)): cmd, data = path[pathIndex] i = 0 @@ -2196,8 +2197,8 @@ def cleanPath(element, options): # different from "l...z". # # To do such a rewrite, we need to understand the - # full subpath, so for now just leave the first - # two coordinates of "m" alone. + # full subpath. This logic happens after this + # loop. i = 2 while i < len(data): if data[i] == data[i + 1] == 0: @@ -2231,6 +2232,45 @@ def cleanPath(element, options): path[pathIndex] = (cmd, [coord for coord in data if coord != 0]) _num_path_segments_removed += len(path[pathIndex][1]) - oldLen + # remove no-op commands + pathIndex = len(path) + subpath_needs_anchor = False + # NB: We can never rewrite the first m/M command (expect if it + # is the only command) + while pathIndex > 1: + pathIndex -= 1 + cmd, data = path[pathIndex] + if cmd == 'z': + next_cmd, next_data = path[pathIndex - 1] + if next_cmd == 'm' and len(next_data) == 2: + # mX Yz -> mX Y + + # note the len check on next_data as it is not + # safe to rewrite "m0 0 1 1z" in general (it is a + # question of where the "pen" ends - you can + # continue a draw on the same subpath after a + # "z"). + del path[pathIndex] + _num_path_segments_removed += 1 + else: + # it is not safe to rewrite "m0 0 ..." to "l..." + # because of this "z" command. + subpath_needs_anchor = True + elif cmd == 'm': + if len(path) - 1 == pathIndex and len(data) == 2: + # Ends with an empty move (but no line/draw + # following it) + del path[pathIndex] + _num_path_segments_removed += 1 + continue + if subpath_needs_anchor: + subpath_needs_anchor = False + elif data[0] == data[1] == 0: + # unanchored, i.e. we can replace "m0 0 ..." with + # "l..." as there is no "z" after it. + path[pathIndex] = ('l', data[2:]) + _num_path_segments_removed += 1 + # fixup: Delete subcommands having no coordinates. path = [elem for elem in path if len(elem[1]) > 0 or elem[0] == 'z'] diff --git a/testscour.py b/testscour.py index b6abfd5..1e0caa7 100755 --- a/testscour.py +++ b/testscour.py @@ -2054,13 +2054,25 @@ class StyleToAttr(unittest.TestCase): self.assertEqual(line.getAttribute('marker-end'), 'url(#m)') -class PathEmptyMove(unittest.TestCase): +class PathCommandRewrites(unittest.TestCase): def runTest(self): - doc = scourXmlFile('unittests/path-empty-move.svg') - # This path can actually be optimized to avoid the "m0 0z". - self.assertEqual(doc.getElementsByTagName('path')[0].getAttribute('d'), 'm100 100 200 100m0 0z') - self.assertEqual(doc.getElementsByTagName('path')[1].getAttribute('d'), 'm100 100v200m0 0 100 100z') + doc = scourXmlFile('unittests/path-command-rewrites.svg') + paths = doc.getElementsByTagName('path') + expected_paths = [ + ('m100 100 200 100', "Trailing m0 0z not removed"), + ('m100 100v200m0 0 100 100z', "Mangled m0 0 100 100"), + ("m100 100v200m0 0 2-1-2 1z", "Should have removed empty m0 0"), + ("m100 100v200l3-5-5 3m0 0 2-1-2 1z", "Rewrite m0 0 3-5-5 3 ... -> l3-5-5 3 ..."), + ("m100 100v200m0 0 3-5-5 3zm0 0 2-1-2 1z", "No rewrite of m0 0 3-5-5 3z"), + ] + self.assertEqual(len(paths), len(expected_paths), "len(actual_paths) != len(expected_paths)") + for i in range(len(paths)): + actual_path = paths[i].getAttribute('d') + expected_path, message = expected_paths[i] + self.assertEqual(actual_path, + expected_path, + '%s: "%s" != "%s"' % (message, actual_path, expected_path)) class DefaultsRemovalToplevel(unittest.TestCase): diff --git a/unittests/path-command-rewrites.svg b/unittests/path-command-rewrites.svg new file mode 100644 index 0000000..47ddc61 --- /dev/null +++ b/unittests/path-command-rewrites.svg @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/unittests/path-empty-move.svg b/unittests/path-empty-move.svg deleted file mode 100644 index d3b63d7..0000000 --- a/unittests/path-empty-move.svg +++ /dev/null @@ -1,5 +0,0 @@ - - - - -