diff --git a/scour/scour.py b/scour/scour.py index ad675e6..95e24b5 100644 --- a/scour/scour.py +++ b/scour/scour.py @@ -715,22 +715,36 @@ def shortenIDs(doc, prefix, options): idList.extend([rid for rid in identifiedElements if rid not in idList]) # Ensure we do not reuse a protected ID by accident protectedIDs = protected_ids(identifiedElements, options) + # IDs that we have used "ahead of time". Happens if we are about to + # rename an ID and there is no length difference between the new and + # the old ID. + consumedIDs = set() curIdNum = 1 for rid in idList: curId = intToID(curIdNum, prefix) - # Skip ahead if the new ID is in use and protected. - while curId in protectedIDs: + # Skip ahead if the new ID has already been used or is protected. + while curId in protectedIDs or curId in consumedIDs: curIdNum += 1 curId = intToID(curIdNum, prefix) - # Now check if we found a new ID (we can happen to choose the same - # ID. More likely on a rerun) - if curId != rid: + # Avoid checking the ID if it will not affect the size of the document + # (e.g. remapping "c" to "a" is not going to win us anything) + if len(curId) != len(rid): # Then go rename it. num += renameID(doc, rid, curId, identifiedElements, referencedIDs) + elif curId < rid: + # If we skip reassigning an ID because it has the same length + # (E.g. skipping "c" -> "a"), then we have to mark the "future" + # ID as taken ("c" in the example). + # The "strictly less than" in the condition is to ensure that we + # properly update curIdNum in the corner case where curId == rid. + consumedIDs.add(rid) + # Use continue here without updating curIdNum to avoid losing + # the current ID. + continue curIdNum += 1 return num diff --git a/testscour.py b/testscour.py index b52d98f..eb62af7 100755 --- a/testscour.py +++ b/testscour.py @@ -1948,6 +1948,19 @@ class ShortenIDsOption(unittest.TestCase): 'Did not update reference to shortened ID') +class ShortenIDsStableOutput(unittest.TestCase): + + def runTest(self): + doc = scourXmlFile('unittests/shorten-ids-stable-output.svg', + parse_args(['--shorten-ids'])) + use_tags = doc.getElementsByTagName('use') + hrefs_ordered = [x.getAttributeNS('http://www.w3.org/1999/xlink', 'href') + for x in use_tags] + expected = ['#a', '#b', '#b'] + self.assertEquals(hrefs_ordered, expected, + '--shorten-ids pointlessly reassigned ids') + + class MustKeepGInSwitch(unittest.TestCase): def runTest(self): diff --git a/unittests/shorten-ids-stable-output.svg b/unittests/shorten-ids-stable-output.svg new file mode 100644 index 0000000..f2df1bc --- /dev/null +++ b/unittests/shorten-ids-stable-output.svg @@ -0,0 +1,11 @@ + + + + + + + + + + +