From 91503c6d7e0882ebfff7bb52def7938a63e9864e Mon Sep 17 00:00:00 2001
From: Niels Thykier <niels@thykier.net>
Date: Sun, 15 Apr 2018 17:04:38 +0000
Subject: [PATCH] renameID: Replace referencedIDs with referringNodes

This change pushes the responsibility of updating referencedIDs to its
callers where needed.  The only caller of renameIDs is shortenIDs and
that works perfectly fine without updating its copy of referencedIDs.
In shortenIDs, we need to be able to lookup which nodes referenced the
"original ID" (and not the "new ID").

While shortenIDs *could* update referencedIDs so it remained valid, it
is extra complexity for no gain.  As an example of this complexity,
imagine if two or more IDs are "rotated" like so:

      Original IDs: a, bb, ccc, dddd
      Mapping:
        dddd -> ccc
        ccc  -> bb
        bb   -> a
        a    -> dddd

While doable within reasonable performance, we do not need to support
it at the moment, so there is no reason to handle that complexity.

Signed-off-by: Niels Thykier <niels@thykier.net>
---
 scour/scour.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/scour/scour.py b/scour/scour.py
index 95e24b5..d74d3d2 100644
--- a/scour/scour.py
+++ b/scour/scour.py
@@ -700,6 +700,10 @@ def shortenIDs(doc, prefix, options):
     num = 0
 
     identifiedElements = findElementsWithId(doc.documentElement)
+    # This map contains maps the (original) ID to the nodes referencing it.
+    # At the end of this function, it will no longer be valid and while we
+    # could keep it up to date, it will complicate the code for no gain
+    # (as we do not reuse the data structure beyond this function).
     referencedIDs = findReferencedElements(doc.documentElement)
 
     # Make idList (list of idnames) sorted by reference count
@@ -734,7 +738,7 @@ def shortenIDs(doc, prefix, options):
         # (e.g. remapping "c" to "a" is not going to win us anything)
         if len(curId) != len(rid):
             # Then go rename it.
-            num += renameID(doc, rid, curId, identifiedElements, referencedIDs)
+            num += renameID(doc, rid, curId, identifiedElements, referencedIDs.get(rid))
         elif curId < rid:
             # If we skip reassigning an ID because it has the same length
             # (E.g. skipping "c" -> "a"), then we have to mark the "future"
@@ -765,14 +769,12 @@ def intToID(idnum, prefix):
     return prefix + rid
 
 
-def renameID(doc, idFrom, idTo, identifiedElements, referencedIDs):
+def renameID(doc, idFrom, idTo, identifiedElements, referringNodes):
     """
     Changes the ID name from idFrom to idTo, on the declaring element
-    as well as all references in the document doc.
+    as well as all nodes in referringNodes.
 
-    Updates identifiedElements, but not referencedIDs.
-    Does not handle the case where idTo is already the ID name
-    of another element in doc.
+    Updates identifiedElements.
 
     Returns the number of bytes saved by this replacement.
     """
@@ -786,7 +788,6 @@ def renameID(doc, idFrom, idTo, identifiedElements, referencedIDs):
     num += len(idFrom) - len(idTo)
 
     # Update references to renamed node
-    referringNodes = referencedIDs.get(idFrom)
     if referringNodes is not None:
 
         # Look for the idFrom ID name in each of the referencing elements,
@@ -837,11 +838,6 @@ def renameID(doc, idFrom, idTo, identifiedElements, referencedIDs):
                     node.setAttribute(attr, newValue)
                     num += len(oldValue) - len(newValue)
 
-        # We deliberately leave referencedIDs alone to enable us to bulk update
-        # IDs where two nodes swap IDs.  (GH#186)
-        # del referencedIDs[idFrom]
-        # referencedIDs[idTo] = referringNodes
-
     return num