stablerange: rearrange the code picking subrange to warm
Same logic, as the previous changesets, we prepare the code before adding merge
support.
# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import collections
import math
from mercurial import (
commands,
cmdutil,
localrepo,
node as nodemod,
scmutil,
)
from mercurial.i18n import _
from . import (
exthelper,
)
eh = exthelper.exthelper()
##################################
### Stable topological sorting ###
##################################
@eh.command(
'debugstablesort',
[
('', 'rev', [], 'heads to start from'),
] + commands.formatteropts,
_(''))
def debugstablesort(ui, repo, **opts):
"""display the ::REVS set topologically sorted in a stable way
"""
revs = scmutil.revrange(repo, opts['rev'])
displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
for r in stablesort(repo, revs):
ctx = repo[r]
displayer.show(ctx)
displayer.flush(ctx)
displayer.close()
def stablesort(repo, revs, mergecallback=None):
"""return '::revs' topologically sorted in "stable" order
This is a depth first traversal starting from 'nullrev', using node as a
tie breaker.
"""
# Various notes:
#
# * Bitbucket is used dates as tie breaker, that might be a good idea.
#
# * It seemds we can traverse in the same order from (one) head to bottom,
# if we the following record data for each merge:
#
# - highest (stablesort-wise) common ancestors,
# - order of parents (tablesort-wise)
cl = repo.changelog
parents = cl.parentrevs
nullrev = nodemod.nullrev
n = cl.node
# step 1: We need a parents -> children mapping for 2 reasons.
#
# * we build the order from nullrev to tip
#
# * we need to detect branching
children = collections.defaultdict(list)
for r in cl.ancestors(revs, inclusive=True):
p1, p2 = parents(r)
children[p1].append(r)
if p2 != nullrev:
children[p2].append(r)
# step two: walk back up
# * pick lowest node in case of branching
# * stack disregarded part of the branching
# * process merge when both parents are yielded
# track what changeset has been
seen = [0] * (max(revs) + 2)
seen[-1] = True # nullrev is known
# starts from repository roots
# reuse the list form the mapping as we won't need it again anyway
stack = children[nullrev]
if not stack:
return []
if 1 < len(stack):
stack.sort(key=n, reverse=True)
# list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
result = []
current = stack.pop()
while current is not None or stack:
if current is None:
# previous iteration reached a merge or an unready merge,
current = stack.pop()
if seen[current]:
current = None
continue
p1, p2 = parents(current)
if not (seen[p1] and seen[p2]):
# we can't iterate on this merge yet because other child is not
# yielded yet (and we are topo sorting) we can discard it for now
# because it will be reached from the other child.
current = None
continue
assert not seen[current]
seen[current] = True
result.append(current) # could be yield, cf earlier comment
if mergecallback is not None and p2 != nullrev:
mergecallback(result, current)
cs = children[current]
if not cs:
current = None
elif 1 == len(cs):
current = cs[0]
else:
cs.sort(key=n, reverse=True)
current = cs.pop() # proceed on smallest
stack.extend(cs) # stack the rest for later
assert len(result) == len(set(result))
return result
#################################
### Stable Range computation ###
#################################
def _hlp2(i):
"""return highest power of two lower than 'i'"""
return 2 ** int(math.log(i - 1, 2))
def subrangesclosure(repo, heads):
"""set of all standard subrange under heads
This is intended for debug purposes. Range are returned from largest to
smallest in terms of number of revision it contains."""
subranges = repo.stablerange.subranges
toproceed = [(r, 0, ) for r in heads]
ranges = set(toproceed)
while toproceed:
entry = toproceed.pop()
for r in subranges(repo, entry):
if r not in ranges:
ranges.add(r)
toproceed.append(r)
ranges = list(ranges)
n = repo.changelog.node
rangelength = repo.stablerange.rangelength
ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0])))
return ranges
class stablerange(object):
def __init__(self):
self._depthcache = {}
self._subrangescache = {}
# To slices merge, we need to walk their descendant in reverse stable
# sort order. For now we perform a full stable sort their descendant
# and then use the relevant top most part. This order is going to be
# the same for all ranges headed at the same merge. So we cache these
# value to reuse them accross the same invocation.
self._stablesortcache = {}
# something useful to compute the above
# mergerev -> stablesort, length
self._stablesortprepared = {}
# caching parent call # as we do so many of them
self._parentscache = {}
def warmup(self, repo, heads):
"""warm the cache up to 'heads'"""
repo = repo.unfiltered()
for r in repo.revs("::%ld", heads):
self.depthrev(repo, r)
def depthrev(self, repo, rev):
repo = repo.unfiltered()
cl = repo.changelog
cache = self._depthcache
nullrev = nodemod.nullrev
stack = [rev]
while stack:
revdepth = None
current = stack[-1]
revdepth = cache.get(current)
if revdepth is not None:
stack.pop()
continue
p1, p2 = self._parents(current, cl.parentrevs)
if p1 == nullrev:
# root case
revdepth = 1
elif p2 == nullrev:
# linear commit case
parentdepth = cache.get(p1)
if parentdepth is None:
stack.append(p1)
else:
revdepth = parentdepth + 1
else:
# merge case
revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
if revdepth is not None:
cache[current] = revdepth
stack.pop()
# actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
# assert revdepth == actual_depth, (rev, revdepth, actual_depth)
return revdepth
def rangelength(self, repo, rangeid):
headrev, index = rangeid[0], rangeid[1]
return self.depthrev(repo, headrev) - index
def subranges(self, repo, rangeid):
cached = self._subrangescache.get(rangeid)
if cached is not None:
return cached
value = self._subranges(repo, rangeid)
self._subrangescache[rangeid] = value
return value
def revsfromrange(self, repo, rangeid):
headrev, index = rangeid
rangelength = self.rangelength(repo, rangeid)
if rangelength == 1:
revs = [headrev]
else:
# get all revs under heads in stable order
#
# note: In the general case we can just walk down and then request
# data about the merge. But I'm not sure this function will be even
# call for the general case.
allrevs = self._stablesortcache.get(headrev)
if allrevs is None:
allrevs = self._getrevsfrommerge(repo, headrev)
if allrevs is None:
allrevs = stablesort(repo, [headrev],
mergecallback=self._filestablesortcache)
self._stablesortcache[headrev] = allrevs
# takes from index
revs = allrevs[index:]
# sanity checks
assert len(revs) == rangelength
return revs
def _parents(self, rev, func):
parents = self._parentscache.get(rev)
if parents is None:
parents = func(rev)
self._parentscache[rev] = parents
return parents
def _filestablesortcache(self, sortedrevs, merge):
if merge not in self._stablesortprepared:
self._stablesortprepared[merge] = (sortedrevs, len(sortedrevs))
def _getrevsfrommerge(self, repo, merge):
prepared = self._stablesortprepared.get(merge)
if prepared is None:
return None
mergedepth = self.depthrev(repo, merge)
allrevs = prepared[0][:prepared[1]]
nbextrarevs = prepared[1] - mergedepth
if not nbextrarevs:
return allrevs
anc = repo.changelog.ancestors([merge], inclusive=True)
top = []
counter = nbextrarevs
for rev in reversed(allrevs):
if rev in anc:
top.append(rev)
else:
counter -= 1
if counter <= 0:
break
bottomidx = prepared[1] - (nbextrarevs + len(top))
revs = allrevs[:bottomidx]
revs.extend(reversed(top))
return revs
@staticmethod
def _depthmerge(cl, rev, p1, p2, stack, cache):
# sub method to simplify the main 'depthrev' one
revdepth = None
depth_p1 = cache.get(p1)
depth_p2 = cache.get(p2)
missingparent = False
if depth_p1 is None:
stack.append(p1)
missingparent = True
if depth_p2 is None:
stack.append(p2)
missingparent = True
if missingparent:
return None
# computin depth of a merge
# XXX the common ancestors heads could be cached
ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
ancrevs = [cl.rev(a) for a in ancnodes]
anyunkown = False
ancdepth = []
for r in ancrevs:
d = cache.get(r)
if d is None:
anyunkown = True
stack.append(r)
ancdepth.append((r, d))
if anyunkown:
return None
if not ancrevs:
# unrelated branch, (no common root)
revdepth = depth_p1 + depth_p2 + 1
elif len(ancrevs) == 1:
# one unique branch point:
# we can compute depth without any walk
depth_anc = ancdepth[0][1]
revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
else:
# multiple ancestors, we pick one that is
# * the deepest (less changeset outside of it),
# * lowest revs because more chance to have descendant of other "above"
anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
return revdepth
def _subranges(self, repo, rangeid):
if self.rangelength(repo, rangeid) == 1:
return []
slicepoint = self._slicepoint(repo, rangeid)
# make sure we have cache for all relevant parent first to prevent
# recursion (python is bad with recursion
stack = []
current = rangeid
while current is not None:
current = self._cold_reusable(repo, current, slicepoint)
if current is not None:
stack.append(current)
while stack:
# these call will directly compute the subranges
self.subranges(repo, stack.pop())
return self._slicesrangeat(repo, rangeid, slicepoint)
def _cold_reusable(self, repo, rangeid, slicepoint):
"""return parent range that it would be useful to prepare to slice
rangeid at slicepoint
This function also have the important task to update the revscache of
the parent revs if possible and needed"""
# is this is a merge, there is not need to prepare the parents.
p1, p2 = self._parents(rangeid[0], repo.changelog.parentrevs)
if p2 != nodemod.nullrev:
return None
reusablerev = p1
# if we reached the slicepoint, no need to go further
if self.depthrev(repo, reusablerev) <= slicepoint:
return None
reurange = (reusablerev, rangeid[1])
# if we have an entry for the current range, lets update the cache
# if we already have subrange for this range, no need to prepare it.
if self._subrangescache.get(reurange) is not None:
return None
# look like we found a relevent parentrange with no cache yet
return reurange
def _slicepoint(self, repo, rangeid):
rangedepth = self.depthrev(repo, rangeid[0])
step = _hlp2(rangedepth)
standard_start = 0
while standard_start < rangeid[1] and 0 < step:
if standard_start + step < rangedepth:
standard_start += step
step //= 2
if rangeid[1] == standard_start:
slicesize = _hlp2(self.rangelength(repo, rangeid))
slicepoint = rangeid[1] + slicesize
else:
assert standard_start < rangedepth
slicepoint = standard_start
return slicepoint
def _slicesrangeat(self, repo, rangeid, globalindex):
p1, p2 = self._parents(rangeid[0], repo.changelog.parentrevs)
if p2 != nodemod.nullrev:
return self._slicesrangeatmerge(repo, rangeid, globalindex)
reuserev = p1
assert reuserev != nodemod.nullrev
reuserange = (reuserev, rangeid[1])
top = (rangeid[0], globalindex)
if rangeid[1] + self.rangelength(repo, reuserange) == globalindex:
return [reuserange, top]
# This will not initiate a recursion since we took appropriate
# precaution in the caller of this method to ensure it will be so.
# It the parent is a merge that will not be the case but computing
# subranges from a merge will not recurse.
reusesubranges = self.subranges(repo, reuserange)
slices = reusesubranges[:-1] # pop the top
slices.append(top)
return slices
def _slicesrangeatmerge(self, repo, rangeid, globalindex):
localindex = globalindex - rangeid[1]
cl = repo.changelog
result = []
allrevs = self.revsfromrange(repo, rangeid)
bottomrevs = allrevs[:localindex]
if globalindex == self.depthrev(repo, bottomrevs[-1]):
# simple case, top revision in the bottom set contains exactly the
# revision we needs
result.append((bottomrevs[-1], rangeid[1]))
else:
parentrevs = cl.parentrevs
parents = self._parents
bheads = set(bottomrevs)
du = bheads.difference_update
reachableroots = repo.changelog.reachableroots
for r in bottomrevs:
du(parents(r, parentrevs))
for h in bheads:
# reachable roots is fast because is C
#
# It is worth noting that will use this kind of filtering from
# "h" multiple time in a warming run. So using "ancestors" and
# caching that should be faster. But python code filtering on
# the ancestors end up being slower.
hrevs = reachableroots(bottomrevs[0], [h], bottomrevs, True)
start = self.depthrev(repo, h) - len(hrevs)
entry = (h, start)
result.append(entry)
# Talking about python code being slow, the following code is an
# alternative implementation.
#
# It complexity is better since is does a single traversal on the
# bottomset. However since it is all python it end up being
# slower.
# I'm keeping it here as an inspiration for a future C version
# branches = []
# for current in reversed(bottomrevs):
# ps = parents(current, parentrevs)
# found = False
# for brevs, bexpect in branches:
# if current in bexpect:
# found = True
# brevs.append(current)
# bexpect.discard(current)
# bexpect.update(ps)
# if not found:
# branches.append(([current], set(ps)))
# for revs, __ in reversed(branches):
# head = revs[0]
# index = self.depthrev(repo, head) - len(revs)
# result.append((head, index))
# top part is trivial
top = (rangeid[0], globalindex)
result.append(top)
return result
@eh.reposetup
def setupcache(ui, repo):
class stablerangerepo(repo.__class__):
@localrepo.unfilteredpropertycache
def stablerange(self):
return stablerange()
repo.__class__ = stablerangerepo