stablerange: change the key to use the revision number
Using node is more stable but for now do not have on disk caching and revision
number should be find in memory. This makes the data used for the file cache
closer to what it will be when we use tuple.
We might reintroduce node in the future but let us keep it simple for now.
# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import collections
import math
from mercurial import (
commands,
cmdutil,
localrepo,
node as nodemod,
scmutil,
util,
)
from mercurial.i18n import _
from . import (
exthelper,
)
eh = exthelper.exthelper()
##################################
### Stable topological sorting ###
##################################
@eh.command(
'debugstablesort',
[
('', 'rev', [], 'heads to start from'),
] + commands.formatteropts,
_(''))
def debugstablesort(ui, repo, **opts):
"""display the ::REVS set topologically sorted in a stable way
"""
revs = scmutil.revrange(repo, opts['rev'])
displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
for r in stablesort(repo, revs):
ctx = repo[r]
displayer.show(ctx)
displayer.flush(ctx)
displayer.close()
def stablesort(repo, revs):
"""return '::revs' topologically sorted in "stable" order
This is a depth first traversal starting from 'nullrev', using node as a
tie breaker.
"""
# Various notes:
#
# * Bitbucket is used dates as tie breaker, that might be a good idea.
#
# * It seemds we can traverse in the same order from (one) head to bottom,
# if we the following record data for each merge:
#
# - highest (stablesort-wise) common ancestors,
# - order of parents (tablesort-wise)
cl = repo.changelog
parents = cl.parentrevs
nullrev = nodemod.nullrev
n = cl.node
# step 1: We need a parents -> children mapping for 2 reasons.
#
# * we build the order from nullrev to tip
#
# * we need to detect branching
children = collections.defaultdict(list)
for r in cl.ancestors(revs, inclusive=True):
p1, p2 = parents(r)
children[p1].append(r)
if p2 != nullrev:
children[p2].append(r)
# step two: walk back up
# * pick lowest node in case of branching
# * stack disregarded part of the branching
# * process merge when both parents are yielded
# track what changeset has been
seen = [0] * (max(revs) + 2)
seen[-1] = True # nullrev is known
# starts from repository roots
# reuse the list form the mapping as we won't need it again anyway
stack = children[nullrev]
if not stack:
return []
if 1 < len(stack):
stack.sort(key=n, reverse=True)
# list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
result = []
current = stack.pop()
while current is not None or stack:
if current is None:
# previous iteration reached a merge or an unready merge,
current = stack.pop()
if seen[current]:
current = None
continue
p1, p2 = parents(current)
if not (seen[p1] and seen[p2]):
# we can't iterate on this merge yet because other child is not
# yielded yet (and we are topo sorting) we can discard it for now
# because it will be reached from the other child.
current = None
continue
assert not seen[current]
seen[current] = True
result.append(current) # could be yield, cf earlier comment
cs = children[current]
if not cs:
current = None
elif 1 == len(cs):
current = cs[0]
else:
cs.sort(key=n, reverse=True)
current = cs.pop() # proceed on smallest
stack.extend(cs) # stack the rest for later
assert len(result) == len(set(result))
return result
#################################
### Stable Range computation ###
#################################
def subrangesclosure(repo, heads):
"""set of all standard subrange under heads
This is intended for debug purposes. Range are returned from largest to
smallest in terms of number of revision it contains."""
toproceed = [stablerange(repo, r, 0, ) for r in heads]
ranges = set(toproceed)
while toproceed:
entry = toproceed.pop()
for r in entry.subranges():
if r not in ranges:
ranges.add(r)
toproceed.append(r)
ranges = list(ranges)
n = repo.changelog.node
ranges.sort(key=lambda r: (-len(r), n(r[0])))
return ranges
class stablerangecache(dict):
def __init__(self):
self._depthcache = {}
self._subrangescache = {}
def warmup(self, repo, heads):
"""warm the cache up to 'heads'"""
for r in repo.revs("::%ld", heads):
self.depthrev(repo, r)
def depthrev(self, repo, rev):
repo = repo.unfiltered()
cl = repo.changelog
cache = self._depthcache
nullrev = nodemod.nullrev
stack = [rev]
while stack:
revdepth = None
current = stack[-1]
revdepth = cache.get(current)
if revdepth is not None:
stack.pop()
continue
p1, p2 = cl.parentrevs(current)
if p1 == nullrev:
# root case
revdepth = 1
elif p2 == nullrev:
# linear commit case
parentdepth = cache.get(p1)
if parentdepth is None:
stack.append(p1)
else:
revdepth = parentdepth + 1
else:
# merge case
revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
if revdepth is not None:
cache[current] = revdepth
stack.pop()
# actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
# assert revdepth == actual_depth, (rev, revdepth, actual_depth)
return revdepth
@staticmethod
def _depthmerge(cl, rev, p1, p2, stack, cache):
# sub method to simplify the main 'depthrev' one
revdepth = None
depth_p1 = cache.get(p1)
depth_p2 = cache.get(p2)
missingparent = False
if depth_p1 is None:
stack.append(p1)
missingparent = True
if depth_p2 is None:
stack.append(p2)
missingparent = True
if missingparent:
return None
# computin depth of a merge
# XXX the common ancestors heads could be cached
ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
ancrevs = [cl.rev(a) for a in ancnodes]
anyunkown = False
ancdepth = []
for r in ancrevs:
d = cache.get(r)
if d is None:
anyunkown = True
stack.append(r)
ancdepth.append((r, d))
if anyunkown:
return None
if not ancrevs:
# unrelated branch, (no common root)
revdepth = depth_p1 + depth_p2 + 1
elif len(ancrevs) == 1:
# one unique branch point:
# we can compute depth without any walk
depth_anc = ancdepth[0][1]
revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
else:
# multiple ancestors, we pick one that is
# * the deepest (less changeset outside of it),
# * lowest revs because more chance to have descendant of other "above"
anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
return revdepth
def rangelength(self, repo, rangeid):
headrev, index = rangeid.head, rangeid.index
return self.depthrev(repo, headrev) - index
def subranges(self, repo, rangeid):
cached = self._subrangescache.get(rangeid)
if cached is not None:
return cached
if self.rangelength(repo, rangeid) == 1:
value = []
else:
slicepoint = self._slicepoint(repo, rangeid)
value = self._slicesrangeat(repo, rangeid, slicepoint)
self._subrangescache[rangeid] = value
return value
def _slicepoint(self, repo, rangeid):
rangedepth = self.depthrev(repo, rangeid.head)
step = _hlp2(rangedepth)
standard_start = 0
while standard_start < rangeid.index and 0 < step:
if standard_start + step < rangedepth:
standard_start += step
step //= 2
if rangeid.index == standard_start:
slicesize = _hlp2(len(rangeid))
slicepoint = rangeid.index + slicesize
else:
assert standard_start < rangedepth
slicepoint = standard_start
return slicepoint
def _slicesrangeat(self, repo, rangeid, globalindex):
p1, p2 = repo.changelog.parentrevs(rangeid.head)
if p2 != nodemod.nullrev:
return self._slicesrangeatmerge(repo, rangeid, globalindex)
assert p1 != nodemod.nullrev
rangedepth = self.depthrev(repo, rangeid.head)
topsize = rangedepth - globalindex
parentrange = stablerange(repo, p1, rangeid.index, rangeid._revs[:-1])
if topsize == 1:
top = stablerange(repo, rangeid.head, globalindex, [rangeid.head])
return [parentrange, top]
else:
# XXX recursive call, python have issue with them
# The best way to break it would be directly 'self.subranges'
# In that other method we could make sure subrangess for
# (p1(rev), idx) are available before slicing (rev, idx). But the
# heavy object approach makes it a bit inconvenient so that will
# wait for that heavy object to be gone.
parentsubranges = self.subranges(repo, parentrange)
slices = parentsubranges[:-1] # pop the top
top = stablerange(repo, rangeid.head, globalindex, rangeid._revs[-topsize:])
slices.append(top)
return slices
@staticmethod
def _slicesrangeatmerge(repo, rangeid, globalindex):
localindex = globalindex - rangeid.index
cl = repo.changelog
result = []
bottom = rangeid._revs[:localindex]
top = stablerange(repo, rangeid.head, globalindex, rangeid._revs[localindex:])
#
toprootdepth = repo.stablerange.depthrev(repo, top._revs[0])
if toprootdepth + len(top) == rangeid.depth + 1:
bheads = [bottom[-1]]
else:
bheads = set(bottom)
parentrevs = cl.parentrevs
du = bheads.difference_update
for r in bottom:
du(parentrevs(r))
# if len(bheads) == 1:
# assert 1 == len(repo.revs('roots(%ld)', top._revs))
if len(bheads) == 1:
newhead = bottom[-1]
bottomdepth = repo.stablerange.depthrev(repo, newhead)
newstart = bottomdepth - len(bottom)
result.append(stablerange(repo, newhead, newstart, bottom))
else:
# assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
cl = repo.changelog
for h in bheads:
subset = cl.ancestors([h], inclusive=True)
hrevs = [r for r in bottom if r in subset]
start = repo.stablerange.depthrev(repo, h) - len(hrevs)
entry = stablerange(repo, h, start, [r for r in bottom if r in subset])
result.append(entry)
result.append(top)
return result
def _hlp2(i):
"""return highest power of two lower than 'i'"""
return 2 ** int(math.log(i - 1, 2))
class stablerange(object):
def __init__(self, repo, head, index, revs=None):
self._repo = repo.unfiltered()
self.head = head
self.index = index
if revs is not None:
assert len(revs) == len(self)
self._revs = revs
assert index < self.depth, (head, index, self.depth, revs)
def __repr__(self):
return '%s %d %d %s' % (nodemod.short(self.node), self.depth, self.index, nodemod.short(self.obshash))
def __hash__(self):
return self._id
def __eq__(self, other):
if type(self) != type(other):
raise NotImplementedError()
return self.stablekey == other.stablekey
def __getitem__(self, idx):
"""small helper function to prepare for the migration to tuple"""
if idx == 0:
return self.head
elif idx == 1:
return self.index
else:
raise IndexError(idx)
@util.propertycache
def _id(self):
return hash(self.stablekey)
@util.propertycache
def stablekey(self):
return (self.head, self.index)
@util.propertycache
def node(self):
return self._repo.changelog.node(self.head)
def __len__(self):
return self.depth - self.index
@util.propertycache
def depth(self):
return self._repo.stablerange.depthrev(self._repo, self.head)
@util.propertycache
def _revs(self):
r = stablesort(self._repo, [self.head])[self.index:]
assert len(r) == len(self), (self.head, self.index, len(r), len(self))
return r
def subranges(self):
return self._repo.stablerange.subranges(self._repo, self)
@eh.reposetup
def setupcache(ui, repo):
class stablerangerepo(repo.__class__):
@localrepo.unfilteredpropertycache
def stablerange(self):
return stablerangecache()
repo.__class__ = stablerangerepo