hgext3rd/evolve/stablerange.py
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
Wed, 22 Mar 2017 17:33:41 +0100
changeset 2147 5179668d9f47
parent 2142 7dc66a526b21
child 2148 b727c7de0560
permissions -rw-r--r--
stablerange: change the key to use the revision number Using node is more stable but for now do not have on disk caching and revision number should be find in memory. This makes the data used for the file cache closer to what it will be when we use tuple. We might reintroduce node in the future but let us keep it simple for now.

# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import collections
import math

from mercurial import (
    commands,
    cmdutil,
    localrepo,
    node as nodemod,
    scmutil,
    util,
)

from mercurial.i18n import _

from . import (
    exthelper,
)

eh = exthelper.exthelper()

##################################
### Stable topological sorting ###
##################################
@eh.command(
    'debugstablesort',
    [
        ('', 'rev', [], 'heads to start from'),
    ] + commands.formatteropts,
    _(''))
def debugstablesort(ui, repo, **opts):
    """display the ::REVS set topologically sorted in a stable way
    """
    revs = scmutil.revrange(repo, opts['rev'])
    displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
    for r in stablesort(repo, revs):
        ctx = repo[r]
        displayer.show(ctx)
        displayer.flush(ctx)
    displayer.close()

def stablesort(repo, revs):
    """return '::revs' topologically sorted in "stable" order

    This is a depth first traversal starting from 'nullrev', using node as a
    tie breaker.
    """
    # Various notes:
    #
    # * Bitbucket is used dates as tie breaker, that might be a good idea.
    #
    # * It seemds we can traverse in the same order from (one) head to bottom,
    #   if we the following record data for each merge:
    #
    #  - highest (stablesort-wise) common ancestors,
    #  - order of parents (tablesort-wise)
    cl = repo.changelog
    parents = cl.parentrevs
    nullrev = nodemod.nullrev
    n = cl.node
    # step 1: We need a parents -> children mapping for 2 reasons.
    #
    # * we build the order from nullrev to tip
    #
    # * we need to detect branching
    children = collections.defaultdict(list)
    for r in cl.ancestors(revs, inclusive=True):
        p1, p2 = parents(r)
        children[p1].append(r)
        if p2 != nullrev:
            children[p2].append(r)
    # step two: walk back up
    # * pick lowest node in case of branching
    # * stack disregarded part of the branching
    # * process merge when both parents are yielded

    # track what changeset has been
    seen = [0] * (max(revs) + 2)
    seen[-1] = True # nullrev is known
    # starts from repository roots
    # reuse the list form the mapping as we won't need it again anyway
    stack = children[nullrev]
    if not stack:
        return []
    if 1 < len(stack):
        stack.sort(key=n, reverse=True)

    # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
    result = []

    current = stack.pop()
    while current is not None or stack:
        if current is None:
            # previous iteration reached a merge or an unready merge,
            current = stack.pop()
            if seen[current]:
                current = None
                continue
        p1, p2 = parents(current)
        if not (seen[p1] and seen[p2]):
            # we can't iterate on this merge yet because other child is not
            # yielded yet (and we are topo sorting) we can discard it for now
            # because it will be reached from the other child.
            current = None
            continue
        assert not seen[current]
        seen[current] = True
        result.append(current) # could be yield, cf earlier comment
        cs = children[current]
        if not cs:
            current = None
        elif 1 == len(cs):
            current = cs[0]
        else:
            cs.sort(key=n, reverse=True)
            current = cs.pop() # proceed on smallest
            stack.extend(cs)   # stack the rest for later
    assert len(result) == len(set(result))
    return result

#################################
### Stable Range computation  ###
#################################

def subrangesclosure(repo, heads):
    """set of all standard subrange under heads

    This is intended for debug purposes. Range are returned from largest to
    smallest in terms of number of revision it contains."""
    toproceed = [stablerange(repo, r, 0, ) for r in heads]
    ranges = set(toproceed)
    while toproceed:
        entry = toproceed.pop()
        for r in entry.subranges():
            if r not in ranges:
                ranges.add(r)
                toproceed.append(r)
    ranges = list(ranges)
    n = repo.changelog.node
    ranges.sort(key=lambda r: (-len(r), n(r[0])))
    return ranges

class stablerangecache(dict):

    def __init__(self):
        self._depthcache = {}
        self._subrangescache = {}

    def warmup(self, repo, heads):
        """warm the cache up to 'heads'"""
        for r in repo.revs("::%ld", heads):
            self.depthrev(repo, r)

    def depthrev(self, repo, rev):
        repo = repo.unfiltered()
        cl = repo.changelog
        cache = self._depthcache
        nullrev = nodemod.nullrev
        stack = [rev]
        while stack:
            revdepth = None
            current = stack[-1]
            revdepth = cache.get(current)
            if revdepth is not None:
                stack.pop()
                continue
            p1, p2 = cl.parentrevs(current)
            if p1 == nullrev:
                # root case
                revdepth = 1
            elif p2 == nullrev:
                # linear commit case
                parentdepth = cache.get(p1)
                if parentdepth is None:
                    stack.append(p1)
                else:
                    revdepth = parentdepth + 1
            else:
                # merge case
                revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
            if revdepth is not None:
                cache[current] = revdepth
                stack.pop()
        # actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
        # assert revdepth == actual_depth, (rev, revdepth, actual_depth)
        return revdepth

    @staticmethod
    def _depthmerge(cl, rev, p1, p2, stack, cache):
        # sub method to simplify the main 'depthrev' one
        revdepth = None
        depth_p1 = cache.get(p1)
        depth_p2 = cache.get(p2)
        missingparent = False
        if depth_p1 is None:
            stack.append(p1)
            missingparent = True
        if depth_p2 is None:
            stack.append(p2)
            missingparent = True
        if missingparent:
            return None
        # computin depth of a merge
        # XXX the common ancestors heads could be cached
        ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
        ancrevs = [cl.rev(a) for a in ancnodes]
        anyunkown = False
        ancdepth = []
        for r in ancrevs:
            d = cache.get(r)
            if d is None:
                anyunkown = True
                stack.append(r)
            ancdepth.append((r, d))
        if anyunkown:
            return None
        if not ancrevs:
            # unrelated branch, (no common root)
            revdepth = depth_p1 + depth_p2 + 1
        elif len(ancrevs) == 1:
            # one unique branch point:
            # we can compute depth without any walk
            depth_anc = ancdepth[0][1]
            revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
        else:
            # multiple ancestors, we pick one that is
            # * the deepest (less changeset outside of it),
            # * lowest revs because more chance to have descendant of other "above"
            anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
            revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
        return revdepth

    def rangelength(self, repo, rangeid):
        headrev, index = rangeid.head, rangeid.index
        return self.depthrev(repo, headrev) - index

    def subranges(self, repo, rangeid):
        cached = self._subrangescache.get(rangeid)
        if cached is not None:
            return cached
        if self.rangelength(repo, rangeid) == 1:
            value = []
        else:
            slicepoint = self._slicepoint(repo, rangeid)
            value = self._slicesrangeat(repo, rangeid, slicepoint)
        self._subrangescache[rangeid] = value
        return value

    def _slicepoint(self, repo, rangeid):
        rangedepth = self.depthrev(repo, rangeid.head)
        step = _hlp2(rangedepth)
        standard_start = 0
        while standard_start < rangeid.index and 0 < step:
            if standard_start + step < rangedepth:
                standard_start += step
            step //= 2
        if rangeid.index == standard_start:
            slicesize = _hlp2(len(rangeid))
            slicepoint = rangeid.index + slicesize
        else:
            assert standard_start < rangedepth
            slicepoint = standard_start
        return slicepoint

    def _slicesrangeat(self, repo, rangeid, globalindex):
        p1, p2 = repo.changelog.parentrevs(rangeid.head)
        if p2 != nodemod.nullrev:
            return self._slicesrangeatmerge(repo, rangeid, globalindex)
        assert p1 != nodemod.nullrev
        rangedepth = self.depthrev(repo, rangeid.head)
        topsize = rangedepth - globalindex

        parentrange = stablerange(repo, p1, rangeid.index, rangeid._revs[:-1])
        if topsize == 1:
            top = stablerange(repo, rangeid.head, globalindex, [rangeid.head])
            return [parentrange, top]
        else:
            # XXX recursive call, python have issue with them
            # The best way to break it would be directly 'self.subranges'
            # In that other method we could make sure subrangess for
            # (p1(rev), idx) are available before slicing (rev, idx). But the
            # heavy object approach makes it a bit inconvenient so that will
            # wait for that heavy object to be gone.
            parentsubranges = self.subranges(repo, parentrange)
            slices = parentsubranges[:-1] # pop the top
            top = stablerange(repo, rangeid.head, globalindex, rangeid._revs[-topsize:])
            slices.append(top)
            return slices

    @staticmethod
    def _slicesrangeatmerge(repo, rangeid, globalindex):
        localindex = globalindex - rangeid.index
        cl = repo.changelog

        result = []
        bottom = rangeid._revs[:localindex]
        top = stablerange(repo, rangeid.head, globalindex, rangeid._revs[localindex:])
        #
        toprootdepth = repo.stablerange.depthrev(repo, top._revs[0])
        if toprootdepth + len(top) == rangeid.depth + 1:
            bheads = [bottom[-1]]
        else:
            bheads = set(bottom)
            parentrevs = cl.parentrevs
            du = bheads.difference_update
            for r in bottom:
                du(parentrevs(r))
            # if len(bheads) == 1:
            #     assert 1 == len(repo.revs('roots(%ld)', top._revs))
        if len(bheads) == 1:
            newhead = bottom[-1]
            bottomdepth = repo.stablerange.depthrev(repo, newhead)
            newstart = bottomdepth - len(bottom)
            result.append(stablerange(repo, newhead, newstart, bottom))
        else:
            # assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
            cl = repo.changelog
            for h in bheads:
                subset = cl.ancestors([h], inclusive=True)
                hrevs = [r for r in bottom if r in subset]
                start = repo.stablerange.depthrev(repo, h) - len(hrevs)
                entry = stablerange(repo, h, start, [r for r in bottom if r in subset])
                result.append(entry)
        result.append(top)
        return result

def _hlp2(i):
    """return highest power of two lower than 'i'"""
    return 2 ** int(math.log(i - 1, 2))

class stablerange(object):

    def __init__(self, repo, head, index, revs=None):
        self._repo = repo.unfiltered()
        self.head = head
        self.index = index
        if revs is not None:
            assert len(revs) == len(self)
            self._revs = revs
        assert index < self.depth, (head, index, self.depth, revs)

    def __repr__(self):
        return '%s %d %d %s' % (nodemod.short(self.node), self.depth, self.index, nodemod.short(self.obshash))

    def __hash__(self):
        return self._id

    def __eq__(self, other):
        if type(self) != type(other):
            raise NotImplementedError()
        return self.stablekey == other.stablekey

    def __getitem__(self, idx):
        """small helper function to prepare for the migration to tuple"""
        if idx == 0:
            return self.head
        elif idx == 1:
            return self.index
        else:
            raise IndexError(idx)

    @util.propertycache
    def _id(self):
        return hash(self.stablekey)

    @util.propertycache
    def stablekey(self):
        return (self.head, self.index)

    @util.propertycache
    def node(self):
        return self._repo.changelog.node(self.head)

    def __len__(self):
        return self.depth - self.index

    @util.propertycache
    def depth(self):
        return self._repo.stablerange.depthrev(self._repo, self.head)

    @util.propertycache
    def _revs(self):
        r = stablesort(self._repo, [self.head])[self.index:]
        assert len(r) == len(self), (self.head, self.index, len(r), len(self))
        return r

    def subranges(self):
        return self._repo.stablerange.subranges(self._repo, self)

@eh.reposetup
def setupcache(ui, repo):

    class stablerangerepo(repo.__class__):

        @localrepo.unfilteredpropertycache
        def stablerange(self):
            return stablerangecache()

    repo.__class__ = stablerangerepo