stablerange: use rangelength in subrangesclosure
We stop using the building '__len__' this get use closer to be able to use a
tuple.
# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import collections
import math
from mercurial import (
commands,
cmdutil,
localrepo,
node as nodemod,
scmutil,
util,
)
from mercurial.i18n import _
from . import (
exthelper,
)
eh = exthelper.exthelper()
##################################
### Stable topological sorting ###
##################################
@eh.command(
'debugstablesort',
[
('', 'rev', [], 'heads to start from'),
] + commands.formatteropts,
_(''))
def debugstablesort(ui, repo, **opts):
"""display the ::REVS set topologically sorted in a stable way
"""
revs = scmutil.revrange(repo, opts['rev'])
displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
for r in stablesort(repo, revs):
ctx = repo[r]
displayer.show(ctx)
displayer.flush(ctx)
displayer.close()
def stablesort(repo, revs):
"""return '::revs' topologically sorted in "stable" order
This is a depth first traversal starting from 'nullrev', using node as a
tie breaker.
"""
# Various notes:
#
# * Bitbucket is used dates as tie breaker, that might be a good idea.
#
# * It seemds we can traverse in the same order from (one) head to bottom,
# if we the following record data for each merge:
#
# - highest (stablesort-wise) common ancestors,
# - order of parents (tablesort-wise)
cl = repo.changelog
parents = cl.parentrevs
nullrev = nodemod.nullrev
n = cl.node
# step 1: We need a parents -> children mapping for 2 reasons.
#
# * we build the order from nullrev to tip
#
# * we need to detect branching
children = collections.defaultdict(list)
for r in cl.ancestors(revs, inclusive=True):
p1, p2 = parents(r)
children[p1].append(r)
if p2 != nullrev:
children[p2].append(r)
# step two: walk back up
# * pick lowest node in case of branching
# * stack disregarded part of the branching
# * process merge when both parents are yielded
# track what changeset has been
seen = [0] * (max(revs) + 2)
seen[-1] = True # nullrev is known
# starts from repository roots
# reuse the list form the mapping as we won't need it again anyway
stack = children[nullrev]
if not stack:
return []
if 1 < len(stack):
stack.sort(key=n, reverse=True)
# list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
result = []
current = stack.pop()
while current is not None or stack:
if current is None:
# previous iteration reached a merge or an unready merge,
current = stack.pop()
if seen[current]:
current = None
continue
p1, p2 = parents(current)
if not (seen[p1] and seen[p2]):
# we can't iterate on this merge yet because other child is not
# yielded yet (and we are topo sorting) we can discard it for now
# because it will be reached from the other child.
current = None
continue
assert not seen[current]
seen[current] = True
result.append(current) # could be yield, cf earlier comment
cs = children[current]
if not cs:
current = None
elif 1 == len(cs):
current = cs[0]
else:
cs.sort(key=n, reverse=True)
current = cs.pop() # proceed on smallest
stack.extend(cs) # stack the rest for later
assert len(result) == len(set(result))
return result
#################################
### Stable Range computation ###
#################################
def subrangesclosure(repo, heads):
"""set of all standard subrange under heads
This is intended for debug purposes. Range are returned from largest to
smallest in terms of number of revision it contains."""
toproceed = [stablerange(repo, r, 0, ) for r in heads]
ranges = set(toproceed)
while toproceed:
entry = toproceed.pop()
for r in entry.subranges():
if r not in ranges:
ranges.add(r)
toproceed.append(r)
ranges = list(ranges)
n = repo.changelog.node
rangelength = repo.stablerange.rangelength
ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0])))
return ranges
class stablerangecache(dict):
def __init__(self):
self._depthcache = {}
self._subrangescache = {}
def warmup(self, repo, heads):
"""warm the cache up to 'heads'"""
for r in repo.revs("::%ld", heads):
self.depthrev(repo, r)
def depthrev(self, repo, rev):
repo = repo.unfiltered()
cl = repo.changelog
cache = self._depthcache
nullrev = nodemod.nullrev
stack = [rev]
while stack:
revdepth = None
current = stack[-1]
revdepth = cache.get(current)
if revdepth is not None:
stack.pop()
continue
p1, p2 = cl.parentrevs(current)
if p1 == nullrev:
# root case
revdepth = 1
elif p2 == nullrev:
# linear commit case
parentdepth = cache.get(p1)
if parentdepth is None:
stack.append(p1)
else:
revdepth = parentdepth + 1
else:
# merge case
revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
if revdepth is not None:
cache[current] = revdepth
stack.pop()
# actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
# assert revdepth == actual_depth, (rev, revdepth, actual_depth)
return revdepth
@staticmethod
def _depthmerge(cl, rev, p1, p2, stack, cache):
# sub method to simplify the main 'depthrev' one
revdepth = None
depth_p1 = cache.get(p1)
depth_p2 = cache.get(p2)
missingparent = False
if depth_p1 is None:
stack.append(p1)
missingparent = True
if depth_p2 is None:
stack.append(p2)
missingparent = True
if missingparent:
return None
# computin depth of a merge
# XXX the common ancestors heads could be cached
ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
ancrevs = [cl.rev(a) for a in ancnodes]
anyunkown = False
ancdepth = []
for r in ancrevs:
d = cache.get(r)
if d is None:
anyunkown = True
stack.append(r)
ancdepth.append((r, d))
if anyunkown:
return None
if not ancrevs:
# unrelated branch, (no common root)
revdepth = depth_p1 + depth_p2 + 1
elif len(ancrevs) == 1:
# one unique branch point:
# we can compute depth without any walk
depth_anc = ancdepth[0][1]
revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
else:
# multiple ancestors, we pick one that is
# * the deepest (less changeset outside of it),
# * lowest revs because more chance to have descendant of other "above"
anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
return revdepth
def rangelength(self, repo, rangeid):
headrev, index = rangeid[0], rangeid[1]
return self.depthrev(repo, headrev) - index
def subranges(self, repo, rangeid):
cached = self._subrangescache.get(rangeid)
if cached is not None:
return cached
if self.rangelength(repo, rangeid) == 1:
value = []
else:
slicepoint = self._slicepoint(repo, rangeid)
value = self._slicesrangeat(repo, rangeid, slicepoint)
self._subrangescache[rangeid] = value
return value
def _slicepoint(self, repo, rangeid):
rangedepth = self.depthrev(repo, rangeid[0])
step = _hlp2(rangedepth)
standard_start = 0
while standard_start < rangeid[1] and 0 < step:
if standard_start + step < rangedepth:
standard_start += step
step //= 2
if rangeid[1] == standard_start:
slicesize = _hlp2(len(rangeid))
slicepoint = rangeid[1] + slicesize
else:
assert standard_start < rangedepth
slicepoint = standard_start
return slicepoint
def _slicesrangeat(self, repo, rangeid, globalindex):
p1, p2 = repo.changelog.parentrevs(rangeid[0])
if p2 != nodemod.nullrev:
return self._slicesrangeatmerge(repo, rangeid, globalindex)
assert p1 != nodemod.nullrev
rangedepth = self.depthrev(repo, rangeid[0])
topsize = rangedepth - globalindex
parentrange = stablerange(repo, p1, rangeid[1], rangeid._revs[:-1])
if topsize == 1:
top = stablerange(repo, rangeid[0], globalindex, [rangeid[0]])
return [parentrange, top]
else:
# XXX recursive call, python have issue with them
# The best way to break it would be directly 'self.subranges'
# In that other method we could make sure subrangess for
# (p1(rev), idx) are available before slicing (rev, idx). But the
# heavy object approach makes it a bit inconvenient so that will
# wait for that heavy object to be gone.
parentsubranges = self.subranges(repo, parentrange)
slices = parentsubranges[:-1] # pop the top
top = stablerange(repo, rangeid[0], globalindex, rangeid._revs[-topsize:])
slices.append(top)
return slices
@staticmethod
def _slicesrangeatmerge(repo, rangeid, globalindex):
localindex = globalindex - rangeid[1]
cl = repo.changelog
result = []
bottom = rangeid._revs[:localindex]
top = stablerange(repo, rangeid[0], globalindex, rangeid._revs[localindex:])
#
toprootdepth = repo.stablerange.depthrev(repo, top._revs[0])
if toprootdepth + len(top) == rangeid.depth + 1:
bheads = [bottom[-1]]
else:
bheads = set(bottom)
parentrevs = cl.parentrevs
du = bheads.difference_update
for r in bottom:
du(parentrevs(r))
# if len(bheads) == 1:
# assert 1 == len(repo.revs('roots(%ld)', top._revs))
if len(bheads) == 1:
newhead = bottom[-1]
bottomdepth = repo.stablerange.depthrev(repo, newhead)
newstart = bottomdepth - len(bottom)
result.append(stablerange(repo, newhead, newstart, bottom))
else:
# assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
cl = repo.changelog
for h in bheads:
subset = cl.ancestors([h], inclusive=True)
hrevs = [r for r in bottom if r in subset]
start = repo.stablerange.depthrev(repo, h) - len(hrevs)
entry = stablerange(repo, h, start, [r for r in bottom if r in subset])
result.append(entry)
result.append(top)
return result
def _hlp2(i):
"""return highest power of two lower than 'i'"""
return 2 ** int(math.log(i - 1, 2))
class stablerange(object):
def __init__(self, repo, head, index, revs=None):
self._repo = repo.unfiltered()
self._head = head
self._index = index
if revs is not None:
assert len(revs) == len(self)
self._revs = revs
assert index < self.depth, (head, index, self.depth, revs)
def __repr__(self):
nodehead = self._repo.changelog.node(self[0])
return '%s %d %d %s' % (nodemod.short(nodehead), self.depth, self[1], nodemod.short(self.obshash))
def __hash__(self):
return self._id
def __eq__(self, other):
if type(self) != type(other):
raise NotImplementedError()
return self._stablekey == other._stablekey
def __getitem__(self, idx):
"""small helper function to prepare for the migration to tuple"""
if idx == 0:
return self._head
elif idx == 1:
return self._index
else:
raise IndexError(idx)
@util.propertycache
def _id(self):
return hash(self._stablekey)
@util.propertycache
def _stablekey(self):
return (self[0], self[1])
def __len__(self):
return self.depth - self[1]
@util.propertycache
def depth(self):
return self._repo.stablerange.depthrev(self._repo, self[0])
@util.propertycache
def _revs(self):
r = stablesort(self._repo, [self[0]])[self[1]:]
assert len(r) == len(self), (self[0], self[1], len(r), len(self))
return r
def subranges(self):
return self._repo.stablerange.subranges(self._repo, self)
@eh.reposetup
def setupcache(ui, repo):
class stablerangerepo(repo.__class__):
@localrepo.unfilteredpropertycache
def stablerange(self):
return stablerangecache()
repo.__class__ = stablerangerepo