stablerange: use the jump information for faster iteration
authorPierre-Yves David <pierre-yves.david@octobus.net>
Wed, 20 Dec 2017 15:51:05 +0100
changeset 3324 6ba8eaffe8f6
parent 3323 4a84947010a1
child 3325 83d2a2f3dc8f
stablerange: use the jump information for faster iteration Instead of doing a full iteration over the exclusive set, we compare the jump information instead. This perform the same kind of logic than before but will allow for much faster iteration once all the caches are in place.
hgext3rd/evolve/stablerange.py
--- a/hgext3rd/evolve/stablerange.py	Sun Dec 10 02:46:05 2017 +0100
+++ b/hgext3rd/evolve/stablerange.py	Wed Dec 20 15:51:05 2017 +0100
@@ -8,6 +8,7 @@
 # GNU General Public License version 2 or any later version.
 
 import abc
+import functools
 import heapq
 import math
 import os
@@ -446,33 +447,114 @@
         """Compute subrange unique to the exclusive part of merge"""
         result = []
         depth = repo.depthcache.get
+        walkfrom = functools.partial(self._sortcache.walkfrom, repo)
+        getjumps = functools.partial(self._sortcache.getjumps, repo)
         skips = depth(headrev) - slicepoint
+        tomap = slicepoint - initial_index
+
+        jumps = getjumps(headrev)
+        # this function is only caled if headrev is a merge
+        # and initial_index is above its lower parents
+        assert jumps is not None
+        jumps = iter(jumps)
+        assert 0 < skips, skips
+        assert 0 < tomap, (tomap, (headrev, initial_index), slicepoint)
+
+        # utility function know the size of segment
+        # (this value could be cached)
+        def until(start, stop):
+            revs = walkfrom(start)
+            count = 0
+            for count, r in enumerate(revs, 1):
+                if r == stop:
+                    break
+            assert 0 < count, (start, stop)
+            return count
+
+        # utility function to find the next changeset with jump information
+        # (and the distance to it)
+        def nextmerge(startrev):
+            for idx, rev in enumerate(walkfrom(startrev)):
+                if getjumps(rev) is not None:
+                    return idx, rev
+            idx += 1
+            return idx, None
 
-        def nextrevs():
-            revs = self._sortcache.walkfrom(repo, headrev)
-            towalk = depth(headrev) - initial_index
-            while 0 < towalk:
-                yield next(revs)
-                towalk -= 1
-            yield None
-        revs = nextrevs()
-        for i in xrange(skips):
-            next(revs)
-        rangehead = current = next(revs)
-        rangepath = self._sortcache.walkfrom(repo, current)
-        nextonpath = next(rangepath, None)
-        steps = 0
-        while current is not None:
-            while current == nextonpath and current is not None:
-                steps += 1
-                current = next(revs)
-                nextonpath = next(rangepath, None)
-            result.append((rangehead, depth(rangehead) - steps))
-            if current is not None:
-                rangehead = current
-                rangepath = self._sortcache.walkfrom(repo, current)
-                nextonpath = next(rangepath, None)
-                steps = 0
+        # skip over all necesary data
+        mainjump = None
+        jumpdest = headrev
+        while 0 < skips:
+            jumphead = jumpdest
+            currentjump = next(jumps)
+            jumppoint = currentjump[0]
+            jumpdest = currentjump[1]
+            skipped = size = until(jumphead, jumppoint)
+            if size == skips:
+                mainjump = next(jumps)
+                jumphead = jumpdest
+            elif skips < size:
+                revs = walkfrom(jumphead)
+                next(revs)
+                for i in xrange(skips):
+                    jumphead = next(revs)
+                    assert jumphead is not None
+                skipped = skips
+                size -= skips
+                mainjump = currentjump
+            skips -= skipped
+        assert skips == 0, skips
+
+        # exiting from the previous block we should have:
+        # jumphead: first non-skipped revision (head of the high subrange)
+        # mainjump: next jump coming jump on main iteration
+
+        # Now we need to compare walk on the main iteration with walk from the
+        # current subrange head. Instead of doing a full walk, we just skim
+        # over the jumps for each iteration.
+        rangehead = jumphead
+        refjumps = None
+        size = 0
+        while size < tomap:
+            assert mainjump is not None
+            if refjumps is None:
+                dist2merge, merge = nextmerge(jumphead)
+                dist2jump = until(jumphead, mainjump[0])
+                if (dist2jump <= dist2merge) or merge is None:
+                    refjumps = iter(())
+                    ref = None
+                else:
+                    # advance counters
+                    size += dist2merge
+                    jumphead = merge
+                    refjumps = iter(getjumps(merge))
+                    ref = next(refjumps, None)
+            elif mainjump == ref:
+                # both follow the same path
+                size += until(jumphead, ref[0])
+                jumphead = mainjump[1]
+                mainjump = next(jumps, None)
+                ref = next(refjumps, None)
+                if ref is None:
+                    # we are doing with section specific to the last merge
+                    # reset `refjumps` to trigger the logic that search for the
+                    # next merge
+                    refjumps = None
+            else:
+                size += until(jumphead, mainjump[0])
+                if size < tomap:
+                    subrange = (rangehead, depth(rangehead) - size)
+                    assert subrange[1] < depth(subrange[0])
+                    result.append(subrange)
+                    tomap -= size
+                    size = 0
+                    jumphead = rangehead = mainjump[1]
+                    mainjump = next(jumps, None)
+                    refjumps = None
+
+        if tomap:
+            subrange = (rangehead, depth(rangehead) - tomap)
+            assert subrange[1] < depth(subrange[0]), (rangehead, depth(rangehead), tomap)
+            result.append(subrange)
         result.reverse()
         return result