evolve: hgext3rd/evolve/stablerange.py@c4ece9c98dc4


# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import collections
import math

from mercurial import (
    commands,
    cmdutil,
    localrepo,
    node as nodemod,
    scmutil,
)

from mercurial.i18n import _

from . import (
    exthelper,
)

eh = exthelper.exthelper()

##################################
### Stable topological sorting ###
##################################
@eh.command(
    'debugstablesort',
    [
        ('', 'rev', [], 'heads to start from'),
    ] + commands.formatteropts,
    _(''))
def debugstablesort(ui, repo, **opts):
    """display the ::REVS set topologically sorted in a stable way
    """
    revs = scmutil.revrange(repo, opts['rev'])
    displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
    for r in stablesort(repo, revs):
        ctx = repo[r]
        displayer.show(ctx)
        displayer.flush(ctx)
    displayer.close()

def stablesort(repo, revs):
    """return '::revs' topologically sorted in "stable" order

    This is a depth first traversal starting from 'nullrev', using node as a
    tie breaker.
    """
    # Various notes:
    #
    # * Bitbucket is used dates as tie breaker, that might be a good idea.
    #
    # * It seemds we can traverse in the same order from (one) head to bottom,
    #   if we the following record data for each merge:
    #
    #  - highest (stablesort-wise) common ancestors,
    #  - order of parents (tablesort-wise)
    cl = repo.changelog
    parents = cl.parentrevs
    nullrev = nodemod.nullrev
    n = cl.node
    # step 1: We need a parents -> children mapping for 2 reasons.
    #
    # * we build the order from nullrev to tip
    #
    # * we need to detect branching
    children = collections.defaultdict(list)
    for r in cl.ancestors(revs, inclusive=True):
        p1, p2 = parents(r)
        children[p1].append(r)
        if p2 != nullrev:
            children[p2].append(r)
    # step two: walk back up
    # * pick lowest node in case of branching
    # * stack disregarded part of the branching
    # * process merge when both parents are yielded

    # track what changeset has been
    seen = [0] * (max(revs) + 2)
    seen[-1] = True # nullrev is known
    # starts from repository roots
    # reuse the list form the mapping as we won't need it again anyway
    stack = children[nullrev]
    if not stack:
        return []
    if 1 < len(stack):
        stack.sort(key=n, reverse=True)

    # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
    result = []

    current = stack.pop()
    while current is not None or stack:
        if current is None:
            # previous iteration reached a merge or an unready merge,
            current = stack.pop()
            if seen[current]:
                current = None
                continue
        p1, p2 = parents(current)
        if not (seen[p1] and seen[p2]):
            # we can't iterate on this merge yet because other child is not
            # yielded yet (and we are topo sorting) we can discard it for now
            # because it will be reached from the other child.
            current = None
            continue
        assert not seen[current]
        seen[current] = True
        result.append(current) # could be yield, cf earlier comment
        cs = children[current]
        if not cs:
            current = None
        elif 1 == len(cs):
            current = cs[0]
        else:
            cs.sort(key=n, reverse=True)
            current = cs.pop() # proceed on smallest
            stack.extend(cs)   # stack the rest for later
    assert len(result) == len(set(result))
    return result

#################################
### Stable Range computation  ###
#################################

def subrangesclosure(repo, heads):
    """set of all standard subrange under heads

    This is intended for debug purposes. Range are returned from largest to
    smallest in terms of number of revision it contains."""
    subranges = repo.stablerange.subranges
    toproceed = [(r, 0, ) for r in heads]
    ranges = set(toproceed)
    while toproceed:
        entry = toproceed.pop()
        for r in subranges(repo, entry):
            if r not in ranges:
                ranges.add(r)
                toproceed.append(r)
    ranges = list(ranges)
    n = repo.changelog.node
    rangelength = repo.stablerange.rangelength
    ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0])))
    return ranges

class stablerangecache(dict):

    def __init__(self):
        self._depthcache = {}
        self._subrangescache = {}

        # To slices merge, we need to walk their descendant in reverse stable
        # sort order. For now we perform a full stable sort their descendant
        # and then use the relevant top most part. This order is going to be
        # the same for all ranges headed at the same merge. So we cache these
        # value to reuse them accross the same invocation.
        self._stablesortcache = {}
        # if we already know all the revision that belong to a range, it is
        # quite trivial to have the subrange "inherit" that knowledge. This
        # cache is dedicated to hold the full list of revs inside a subrange
        # when we happens to know it.
        #
        # For example. if we are slicing a range headed by a merge so will have
        # the revision computed anyway, and passing that knowledge around might
        # help to slice one of its subranges also containings a merge.
        self._revsinrangecache = {}

    def warmup(self, repo, heads):
        """warm the cache up to 'heads'"""
        for r in repo.revs("::%ld", heads):
            self.depthrev(repo, r)

    def depthrev(self, repo, rev):
        repo = repo.unfiltered()
        cl = repo.changelog
        cache = self._depthcache
        nullrev = nodemod.nullrev
        stack = [rev]
        while stack:
            revdepth = None
            current = stack[-1]
            revdepth = cache.get(current)
            if revdepth is not None:
                stack.pop()
                continue
            p1, p2 = cl.parentrevs(current)
            if p1 == nullrev:
                # root case
                revdepth = 1
            elif p2 == nullrev:
                # linear commit case
                parentdepth = cache.get(p1)
                if parentdepth is None:
                    stack.append(p1)
                else:
                    revdepth = parentdepth + 1
            else:
                # merge case
                revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
            if revdepth is not None:
                cache[current] = revdepth
                stack.pop()
        # actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
        # assert revdepth == actual_depth, (rev, revdepth, actual_depth)
        return revdepth

    def rangelength(self, repo, rangeid):
        headrev, index = rangeid[0], rangeid[1]
        return self.depthrev(repo, headrev) - index

    def subranges(self, repo, rangeid):
        cached = self._subrangescache.get(rangeid)
        if cached is not None:
            return cached
        if self.rangelength(repo, rangeid) == 1:
            value = []
        else:
            slicepoint = self._slicepoint(repo, rangeid)
            value = self._slicesrangeat(repo, rangeid, slicepoint)
        self._subrangescache[rangeid] = value
        return value

    def revsfromrange(self, repo, rangeid):
        revs = self._revsinrangecache.get(rangeid)
        if revs is None:
            if self.rangelength(repo, rangeid) == 1:
                revs = [rangeid[0]]
            else:
                # get all revs under heads in stable order
                #
                # note: In the general case we can just walk down and then request
                # data about the merge. But I'm not sure this function will be even
                # call for the general case.
                allrevs = self._stablesortcache.get(rangeid[0])
                if allrevs is None:
                    allrevs = stablesort(repo, [rangeid[0]])
                    self._stablesortcache[rangeid[0]] = allrevs
                # takes from index
                revs = allrevs[rangeid[1]:]
            self._revsinrangecache[rangeid] = revs
        # sanity checks
        assert len(revs) == self.rangelength(repo, rangeid)
        return revs

    @staticmethod
    def _depthmerge(cl, rev, p1, p2, stack, cache):
        # sub method to simplify the main 'depthrev' one
        revdepth = None
        depth_p1 = cache.get(p1)
        depth_p2 = cache.get(p2)
        missingparent = False
        if depth_p1 is None:
            stack.append(p1)
            missingparent = True
        if depth_p2 is None:
            stack.append(p2)
            missingparent = True
        if missingparent:
            return None
        # computin depth of a merge
        # XXX the common ancestors heads could be cached
        ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
        ancrevs = [cl.rev(a) for a in ancnodes]
        anyunkown = False
        ancdepth = []
        for r in ancrevs:
            d = cache.get(r)
            if d is None:
                anyunkown = True
                stack.append(r)
            ancdepth.append((r, d))
        if anyunkown:
            return None
        if not ancrevs:
            # unrelated branch, (no common root)
            revdepth = depth_p1 + depth_p2 + 1
        elif len(ancrevs) == 1:
            # one unique branch point:
            # we can compute depth without any walk
            depth_anc = ancdepth[0][1]
            revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
        else:
            # multiple ancestors, we pick one that is
            # * the deepest (less changeset outside of it),
            # * lowest revs because more chance to have descendant of other "above"
            anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
            revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
        return revdepth

    def _slicepoint(self, repo, rangeid):
        rangedepth = self.depthrev(repo, rangeid[0])
        step = _hlp2(rangedepth)
        standard_start = 0
        while standard_start < rangeid[1] and 0 < step:
            if standard_start + step < rangedepth:
                standard_start += step
            step //= 2
        if rangeid[1] == standard_start:
            slicesize = _hlp2(self.rangelength(repo, rangeid))
            slicepoint = rangeid[1] + slicesize
        else:
            assert standard_start < rangedepth
            slicepoint = standard_start
        return slicepoint

    def _slicesrangeat(self, repo, rangeid, globalindex):
        p1, p2 = repo.changelog.parentrevs(rangeid[0])
        if p2 != nodemod.nullrev:
            return self._slicesrangeatmerge(repo, rangeid, globalindex)
        assert p1 != nodemod.nullrev
        rangedepth = self.depthrev(repo, rangeid[0])
        topsize = rangedepth - globalindex

        parentrange = (p1, rangeid[1])
        # if we have an entry for the current range, lets update the cache
        if rangeid in self._revsinrangecache:
            parentrevs = self._revsinrangecache[rangeid][:-1]
            self._revsinrangecache[parentrange] = parentrevs

        if topsize == 1:
            top = (rangeid[0], globalindex)
            return [parentrange, top]
        else:
            # XXX recursive call, python have issue with them
            # The best way to break it would be directly 'self.subranges'
            # In that other method we could make sure subrangess for
            # (p1(rev), idx) are available before slicing (rev, idx). But the
            # heavy object approach makes it a bit inconvenient so that will
            # wait for that heavy object to be gone.
            parentsubranges = self.subranges(repo, parentrange)
            slices = parentsubranges[:-1] # pop the top
            top = (rangeid[0], globalindex)
            # if we have an entry for the current range, lets update the cache
            if rangeid in self._revsinrangecache:
                parentrevs = self._revsinrangecache[rangeid][-topsize:]
                self._revsinrangecache[top] = parentrevs
            slices.append(top)
            return slices

    def _slicesrangeatmerge(self, repo, rangeid, globalindex):
        localindex = globalindex - rangeid[1]
        cl = repo.changelog

        result = []
        allrevs = self.revsfromrange(repo, rangeid)
        toprevs = allrevs[localindex:]
        bottomrevs = allrevs[:localindex]
        top = (rangeid[0], globalindex)
        self._revsinrangecache[top] = toprevs # update cache
        #
        rangedepth = self.depthrev(repo, rangeid[0])
        toprootdepth = self.depthrev(repo, toprevs[0])
        if toprootdepth + self.rangelength(repo, top) == rangedepth + 1:
            bheads = [bottomrevs[-1]]
        else:
            bheads = set(bottomrevs)
            parentrevs = cl.parentrevs
            du = bheads.difference_update
            for r in bottomrevs:
                du(parentrevs(r))
            # if len(bheads) == 1:
            #     assert 1 == len(repo.revs('roots(%ld)', top._revs))
        if len(bheads) == 1:
            newhead = bottomrevs[-1]
            bottomdepth = self.depthrev(repo, newhead)
            newstart = bottomdepth - len(bottomrevs)
            bottom = (newhead, newstart)
            self._revsinrangecache[bottom] = bottomrevs # update cache
            result.append(bottom)
        else:
            # assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
            cl = repo.changelog
            for h in bheads:
                subset = cl.ancestors([h], inclusive=True)
                hrevs = [r for r in bottomrevs if r in subset]
                start = self.depthrev(repo, h) - len(hrevs)
                entry = (h, start)
                entryrevs = [r for r in bottomrevs if r in subset]
                self._revsinrangecache[entry] = entryrevs # update cache
                result.append(entry)
        result.append(top)
        return result

def _hlp2(i):
    """return highest power of two lower than 'i'"""
    return 2 ** int(math.log(i - 1, 2))

@eh.reposetup
def setupcache(ui, repo):

    class stablerangerepo(repo.__class__):

        @localrepo.unfilteredpropertycache
        def stablerange(self):
            return stablerangecache()

    repo.__class__ = stablerangerepo
author	Pierre-Yves David <pierre-yves.david@ens-lyon.org>
	Wed, 22 Mar 2017 21:09:28 +0100
changeset 2197	c4ece9c98dc4
parent 2196	2ecc88baabf9
child 2198	ab392bd1c518
permissions	-rw-r--r--