stablerange: remove the now unused individual range class
That class is now longer necessary, we dropped its usage for performance reason.
# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import collections
import math
from mercurial import (
commands,
cmdutil,
localrepo,
node as nodemod,
scmutil,
)
from mercurial.i18n import _
from . import (
exthelper,
)
eh = exthelper.exthelper()
##################################
### Stable topological sorting ###
##################################
@eh.command(
'debugstablesort',
[
('', 'rev', [], 'heads to start from'),
] + commands.formatteropts,
_(''))
def debugstablesort(ui, repo, **opts):
"""display the ::REVS set topologically sorted in a stable way
"""
revs = scmutil.revrange(repo, opts['rev'])
displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
for r in stablesort(repo, revs):
ctx = repo[r]
displayer.show(ctx)
displayer.flush(ctx)
displayer.close()
def stablesort(repo, revs):
"""return '::revs' topologically sorted in "stable" order
This is a depth first traversal starting from 'nullrev', using node as a
tie breaker.
"""
# Various notes:
#
# * Bitbucket is used dates as tie breaker, that might be a good idea.
#
# * It seemds we can traverse in the same order from (one) head to bottom,
# if we the following record data for each merge:
#
# - highest (stablesort-wise) common ancestors,
# - order of parents (tablesort-wise)
cl = repo.changelog
parents = cl.parentrevs
nullrev = nodemod.nullrev
n = cl.node
# step 1: We need a parents -> children mapping for 2 reasons.
#
# * we build the order from nullrev to tip
#
# * we need to detect branching
children = collections.defaultdict(list)
for r in cl.ancestors(revs, inclusive=True):
p1, p2 = parents(r)
children[p1].append(r)
if p2 != nullrev:
children[p2].append(r)
# step two: walk back up
# * pick lowest node in case of branching
# * stack disregarded part of the branching
# * process merge when both parents are yielded
# track what changeset has been
seen = [0] * (max(revs) + 2)
seen[-1] = True # nullrev is known
# starts from repository roots
# reuse the list form the mapping as we won't need it again anyway
stack = children[nullrev]
if not stack:
return []
if 1 < len(stack):
stack.sort(key=n, reverse=True)
# list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
result = []
current = stack.pop()
while current is not None or stack:
if current is None:
# previous iteration reached a merge or an unready merge,
current = stack.pop()
if seen[current]:
current = None
continue
p1, p2 = parents(current)
if not (seen[p1] and seen[p2]):
# we can't iterate on this merge yet because other child is not
# yielded yet (and we are topo sorting) we can discard it for now
# because it will be reached from the other child.
current = None
continue
assert not seen[current]
seen[current] = True
result.append(current) # could be yield, cf earlier comment
cs = children[current]
if not cs:
current = None
elif 1 == len(cs):
current = cs[0]
else:
cs.sort(key=n, reverse=True)
current = cs.pop() # proceed on smallest
stack.extend(cs) # stack the rest for later
assert len(result) == len(set(result))
return result
#################################
### Stable Range computation ###
#################################
def subrangesclosure(repo, heads):
"""set of all standard subrange under heads
This is intended for debug purposes. Range are returned from largest to
smallest in terms of number of revision it contains."""
subranges = repo.stablerange.subranges
toproceed = [(r, 0, ) for r in heads]
ranges = set(toproceed)
while toproceed:
entry = toproceed.pop()
for r in subranges(repo, entry):
if r not in ranges:
ranges.add(r)
toproceed.append(r)
ranges = list(ranges)
n = repo.changelog.node
rangelength = repo.stablerange.rangelength
ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0])))
return ranges
class stablerangecache(dict):
def __init__(self):
self._depthcache = {}
self._subrangescache = {}
# To slices merge, we need to walk their descendant in reverse stable
# sort order. For now we perform a full stable sort their descendant
# and then use the relevant top most part. This order is going to be
# the same for all ranges headed at the same merge. So we cache these
# value to reuse them accross the same invocation.
self._stablesortcache = {}
# if we already know all the revision that belong to a range, it is
# quite trivial to have the subrange "inherit" that knowledge. This
# cache is dedicated to hold the full list of revs inside a subrange
# when we happens to know it.
#
# For example. if we are slicing a range headed by a merge so will have
# the revision computed anyway, and passing that knowledge around might
# help to slice one of its subranges also containings a merge.
self._revsinrangecache = {}
def warmup(self, repo, heads):
"""warm the cache up to 'heads'"""
for r in repo.revs("::%ld", heads):
self.depthrev(repo, r)
def depthrev(self, repo, rev):
repo = repo.unfiltered()
cl = repo.changelog
cache = self._depthcache
nullrev = nodemod.nullrev
stack = [rev]
while stack:
revdepth = None
current = stack[-1]
revdepth = cache.get(current)
if revdepth is not None:
stack.pop()
continue
p1, p2 = cl.parentrevs(current)
if p1 == nullrev:
# root case
revdepth = 1
elif p2 == nullrev:
# linear commit case
parentdepth = cache.get(p1)
if parentdepth is None:
stack.append(p1)
else:
revdepth = parentdepth + 1
else:
# merge case
revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
if revdepth is not None:
cache[current] = revdepth
stack.pop()
# actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
# assert revdepth == actual_depth, (rev, revdepth, actual_depth)
return revdepth
def rangelength(self, repo, rangeid):
headrev, index = rangeid[0], rangeid[1]
return self.depthrev(repo, headrev) - index
def subranges(self, repo, rangeid):
cached = self._subrangescache.get(rangeid)
if cached is not None:
return cached
if self.rangelength(repo, rangeid) == 1:
value = []
else:
slicepoint = self._slicepoint(repo, rangeid)
value = self._slicesrangeat(repo, rangeid, slicepoint)
self._subrangescache[rangeid] = value
return value
def revsfromrange(self, repo, rangeid):
revs = self._revsinrangecache.get(rangeid)
if revs is None:
if self.rangelength(repo, rangeid) == 1:
revs = [rangeid[0]]
else:
# get all revs under heads in stable order
#
# note: In the general case we can just walk down and then request
# data about the merge. But I'm not sure this function will be even
# call for the general case.
allrevs = self._stablesortcache.get(rangeid[0])
if allrevs is None:
allrevs = stablesort(repo, [rangeid[0]])
self._stablesortcache[rangeid[0]] = allrevs
# takes from index
revs = allrevs[rangeid[1]:]
self._revsinrangecache[rangeid] = revs
# sanity checks
assert len(revs) == self.rangelength(repo, rangeid)
return revs
@staticmethod
def _depthmerge(cl, rev, p1, p2, stack, cache):
# sub method to simplify the main 'depthrev' one
revdepth = None
depth_p1 = cache.get(p1)
depth_p2 = cache.get(p2)
missingparent = False
if depth_p1 is None:
stack.append(p1)
missingparent = True
if depth_p2 is None:
stack.append(p2)
missingparent = True
if missingparent:
return None
# computin depth of a merge
# XXX the common ancestors heads could be cached
ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
ancrevs = [cl.rev(a) for a in ancnodes]
anyunkown = False
ancdepth = []
for r in ancrevs:
d = cache.get(r)
if d is None:
anyunkown = True
stack.append(r)
ancdepth.append((r, d))
if anyunkown:
return None
if not ancrevs:
# unrelated branch, (no common root)
revdepth = depth_p1 + depth_p2 + 1
elif len(ancrevs) == 1:
# one unique branch point:
# we can compute depth without any walk
depth_anc = ancdepth[0][1]
revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
else:
# multiple ancestors, we pick one that is
# * the deepest (less changeset outside of it),
# * lowest revs because more chance to have descendant of other "above"
anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
return revdepth
def _slicepoint(self, repo, rangeid):
rangedepth = self.depthrev(repo, rangeid[0])
step = _hlp2(rangedepth)
standard_start = 0
while standard_start < rangeid[1] and 0 < step:
if standard_start + step < rangedepth:
standard_start += step
step //= 2
if rangeid[1] == standard_start:
slicesize = _hlp2(self.rangelength(repo, rangeid))
slicepoint = rangeid[1] + slicesize
else:
assert standard_start < rangedepth
slicepoint = standard_start
return slicepoint
def _slicesrangeat(self, repo, rangeid, globalindex):
p1, p2 = repo.changelog.parentrevs(rangeid[0])
if p2 != nodemod.nullrev:
return self._slicesrangeatmerge(repo, rangeid, globalindex)
assert p1 != nodemod.nullrev
rangedepth = self.depthrev(repo, rangeid[0])
topsize = rangedepth - globalindex
parentrange = (p1, rangeid[1])
# if we have an entry for the current range, lets update the cache
if rangeid in self._revsinrangecache:
parentrevs = self._revsinrangecache[rangeid][:-1]
self._revsinrangecache[parentrange] = parentrevs
if topsize == 1:
top = (rangeid[0], globalindex)
return [parentrange, top]
else:
# XXX recursive call, python have issue with them
# The best way to break it would be directly 'self.subranges'
# In that other method we could make sure subrangess for
# (p1(rev), idx) are available before slicing (rev, idx). But the
# heavy object approach makes it a bit inconvenient so that will
# wait for that heavy object to be gone.
parentsubranges = self.subranges(repo, parentrange)
slices = parentsubranges[:-1] # pop the top
top = (rangeid[0], globalindex)
# if we have an entry for the current range, lets update the cache
if rangeid in self._revsinrangecache:
parentrevs = self._revsinrangecache[rangeid][-topsize:]
self._revsinrangecache[top] = parentrevs
slices.append(top)
return slices
def _slicesrangeatmerge(self, repo, rangeid, globalindex):
localindex = globalindex - rangeid[1]
cl = repo.changelog
result = []
allrevs = self.revsfromrange(repo, rangeid)
toprevs = allrevs[localindex:]
bottomrevs = allrevs[:localindex]
top = (rangeid[0], globalindex)
self._revsinrangecache[top] = toprevs # update cache
#
rangedepth = self.depthrev(repo, rangeid[0])
toprootdepth = self.depthrev(repo, toprevs[0])
if toprootdepth + self.rangelength(repo, top) == rangedepth + 1:
bheads = [bottomrevs[-1]]
else:
bheads = set(bottomrevs)
parentrevs = cl.parentrevs
du = bheads.difference_update
for r in bottomrevs:
du(parentrevs(r))
# if len(bheads) == 1:
# assert 1 == len(repo.revs('roots(%ld)', top._revs))
if len(bheads) == 1:
newhead = bottomrevs[-1]
bottomdepth = self.depthrev(repo, newhead)
newstart = bottomdepth - len(bottomrevs)
bottom = (newhead, newstart)
self._revsinrangecache[bottom] = bottomrevs # update cache
result.append(bottom)
else:
# assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
cl = repo.changelog
for h in bheads:
subset = cl.ancestors([h], inclusive=True)
hrevs = [r for r in bottomrevs if r in subset]
start = self.depthrev(repo, h) - len(hrevs)
entry = (h, start)
entryrevs = [r for r in bottomrevs if r in subset]
self._revsinrangecache[entry] = entryrevs # update cache
result.append(entry)
result.append(top)
return result
def _hlp2(i):
"""return highest power of two lower than 'i'"""
return 2 ** int(math.log(i - 1, 2))
@eh.reposetup
def setupcache(ui, repo):
class stablerangerepo(repo.__class__):
@localrepo.unfilteredpropertycache
def stablerange(self):
return stablerangecache()
repo.__class__ = stablerangerepo