hgext3rd/evolve/stablerange.py
author Pierre-Yves David <pierre-yves.david@ens-lyon.org>
Thu, 23 Mar 2017 10:49:03 +0100
changeset 2206 84537469a094
parent 2205 bd5e2496e5cd
child 2207 f82a398162f5
permissions -rw-r--r--
slicesrangeat: stop double setting the revsinranges cache The cache should have already been filled by the logic warming the cache for the parent.

# Code dedicated to the computation and properties of "stable ranges"
#
# These stable ranges are use for obsolescence markers discovery
#
# Copyright 2017 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

import collections
import math

from mercurial import (
    commands,
    cmdutil,
    localrepo,
    node as nodemod,
    scmutil,
)

from mercurial.i18n import _

from . import (
    exthelper,
)

eh = exthelper.exthelper()

##################################
### Stable topological sorting ###
##################################
@eh.command(
    'debugstablesort',
    [
        ('', 'rev', [], 'heads to start from'),
    ] + commands.formatteropts,
    _(''))
def debugstablesort(ui, repo, **opts):
    """display the ::REVS set topologically sorted in a stable way
    """
    revs = scmutil.revrange(repo, opts['rev'])
    displayer = cmdutil.show_changeset(ui, repo, opts, buffered=True)
    for r in stablesort(repo, revs):
        ctx = repo[r]
        displayer.show(ctx)
        displayer.flush(ctx)
    displayer.close()

def stablesort(repo, revs):
    """return '::revs' topologically sorted in "stable" order

    This is a depth first traversal starting from 'nullrev', using node as a
    tie breaker.
    """
    # Various notes:
    #
    # * Bitbucket is used dates as tie breaker, that might be a good idea.
    #
    # * It seemds we can traverse in the same order from (one) head to bottom,
    #   if we the following record data for each merge:
    #
    #  - highest (stablesort-wise) common ancestors,
    #  - order of parents (tablesort-wise)
    cl = repo.changelog
    parents = cl.parentrevs
    nullrev = nodemod.nullrev
    n = cl.node
    # step 1: We need a parents -> children mapping for 2 reasons.
    #
    # * we build the order from nullrev to tip
    #
    # * we need to detect branching
    children = collections.defaultdict(list)
    for r in cl.ancestors(revs, inclusive=True):
        p1, p2 = parents(r)
        children[p1].append(r)
        if p2 != nullrev:
            children[p2].append(r)
    # step two: walk back up
    # * pick lowest node in case of branching
    # * stack disregarded part of the branching
    # * process merge when both parents are yielded

    # track what changeset has been
    seen = [0] * (max(revs) + 2)
    seen[-1] = True # nullrev is known
    # starts from repository roots
    # reuse the list form the mapping as we won't need it again anyway
    stack = children[nullrev]
    if not stack:
        return []
    if 1 < len(stack):
        stack.sort(key=n, reverse=True)

    # list of rev, maybe we should yield, but since we built a children mapping we are 'O(N)' already
    result = []

    current = stack.pop()
    while current is not None or stack:
        if current is None:
            # previous iteration reached a merge or an unready merge,
            current = stack.pop()
            if seen[current]:
                current = None
                continue
        p1, p2 = parents(current)
        if not (seen[p1] and seen[p2]):
            # we can't iterate on this merge yet because other child is not
            # yielded yet (and we are topo sorting) we can discard it for now
            # because it will be reached from the other child.
            current = None
            continue
        assert not seen[current]
        seen[current] = True
        result.append(current) # could be yield, cf earlier comment
        cs = children[current]
        if not cs:
            current = None
        elif 1 == len(cs):
            current = cs[0]
        else:
            cs.sort(key=n, reverse=True)
            current = cs.pop() # proceed on smallest
            stack.extend(cs)   # stack the rest for later
    assert len(result) == len(set(result))
    return result

#################################
### Stable Range computation  ###
#################################

def _hlp2(i):
    """return highest power of two lower than 'i'"""
    return 2 ** int(math.log(i - 1, 2))

def subrangesclosure(repo, heads):
    """set of all standard subrange under heads

    This is intended for debug purposes. Range are returned from largest to
    smallest in terms of number of revision it contains."""
    subranges = repo.stablerange.subranges
    toproceed = [(r, 0, ) for r in heads]
    ranges = set(toproceed)
    while toproceed:
        entry = toproceed.pop()
        for r in subranges(repo, entry):
            if r not in ranges:
                ranges.add(r)
                toproceed.append(r)
    ranges = list(ranges)
    n = repo.changelog.node
    rangelength = repo.stablerange.rangelength
    ranges.sort(key=lambda r: (-rangelength(repo, r), n(r[0])))
    return ranges

class stablerange(object):

    def __init__(self):
        self._depthcache = {}
        self._subrangescache = {}

        # To slices merge, we need to walk their descendant in reverse stable
        # sort order. For now we perform a full stable sort their descendant
        # and then use the relevant top most part. This order is going to be
        # the same for all ranges headed at the same merge. So we cache these
        # value to reuse them accross the same invocation.
        self._stablesortcache = {}
        # if we already know all the revision that belong to a range, it is
        # quite trivial to have the subrange "inherit" that knowledge. This
        # cache is dedicated to hold the full list of revs inside a subrange
        # when we happens to know it.
        #
        # For example. if we are slicing a range headed by a merge so will have
        # the revision computed anyway, and passing that knowledge around might
        # help to slice one of its subranges also containings a merge.
        self._revsinrangecache = {}

    def warmup(self, repo, heads):
        """warm the cache up to 'heads'"""
        repo = repo.unfiltered()
        for r in repo.revs("::%ld", heads):
            self.depthrev(repo, r)

    def depthrev(self, repo, rev):
        repo = repo.unfiltered()
        cl = repo.changelog
        cache = self._depthcache
        nullrev = nodemod.nullrev
        stack = [rev]
        while stack:
            revdepth = None
            current = stack[-1]
            revdepth = cache.get(current)
            if revdepth is not None:
                stack.pop()
                continue
            p1, p2 = cl.parentrevs(current)
            if p1 == nullrev:
                # root case
                revdepth = 1
            elif p2 == nullrev:
                # linear commit case
                parentdepth = cache.get(p1)
                if parentdepth is None:
                    stack.append(p1)
                else:
                    revdepth = parentdepth + 1
            else:
                # merge case
                revdepth = self._depthmerge(cl, current, p1, p2, stack, cache)
            if revdepth is not None:
                cache[current] = revdepth
                stack.pop()
        # actual_depth = len(list(cl.ancestors([rev], inclusive=True)))
        # assert revdepth == actual_depth, (rev, revdepth, actual_depth)
        return revdepth

    def rangelength(self, repo, rangeid):
        headrev, index = rangeid[0], rangeid[1]
        return self.depthrev(repo, headrev) - index

    def subranges(self, repo, rangeid):
        cached = self._subrangescache.get(rangeid)
        if cached is not None:
            return cached
        value = self._subranges(repo, rangeid)
        self._subrangescache[rangeid] = value
        return value

    def revsfromrange(self, repo, rangeid):
        revs = self._revsinrangecache.get(rangeid)
        if revs is None:
            if self.rangelength(repo, rangeid) == 1:
                revs = [rangeid[0]]
            else:
                # get all revs under heads in stable order
                #
                # note: In the general case we can just walk down and then request
                # data about the merge. But I'm not sure this function will be even
                # call for the general case.
                allrevs = self._stablesortcache.get(rangeid[0])
                if allrevs is None:
                    allrevs = stablesort(repo, [rangeid[0]])
                    self._stablesortcache[rangeid[0]] = allrevs
                # takes from index
                revs = allrevs[rangeid[1]:]
            self._revsinrangecache[rangeid] = revs
        # sanity checks
        assert len(revs) == self.rangelength(repo, rangeid)
        return revs

    @staticmethod
    def _depthmerge(cl, rev, p1, p2, stack, cache):
        # sub method to simplify the main 'depthrev' one
        revdepth = None
        depth_p1 = cache.get(p1)
        depth_p2 = cache.get(p2)
        missingparent = False
        if depth_p1 is None:
            stack.append(p1)
            missingparent = True
        if depth_p2 is None:
            stack.append(p2)
            missingparent = True
        if missingparent:
            return None
        # computin depth of a merge
        # XXX the common ancestors heads could be cached
        ancnodes = cl.commonancestorsheads(cl.node(p1), cl.node(p2))
        ancrevs = [cl.rev(a) for a in ancnodes]
        anyunkown = False
        ancdepth = []
        for r in ancrevs:
            d = cache.get(r)
            if d is None:
                anyunkown = True
                stack.append(r)
            ancdepth.append((r, d))
        if anyunkown:
            return None
        if not ancrevs:
            # unrelated branch, (no common root)
            revdepth = depth_p1 + depth_p2 + 1
        elif len(ancrevs) == 1:
            # one unique branch point:
            # we can compute depth without any walk
            depth_anc = ancdepth[0][1]
            revdepth = depth_p1 + (depth_p2 - depth_anc) + 1
        else:
            # multiple ancestors, we pick one that is
            # * the deepest (less changeset outside of it),
            # * lowest revs because more chance to have descendant of other "above"
            anc, revdepth = max(ancdepth, key=lambda x: (x[1], -x[0]))
            revdepth += len(cl.findmissingrevs(common=[anc], heads=[rev]))
        return revdepth

    def _subranges(self, repo, rangeid):
        if self.rangelength(repo, rangeid) == 1:
            return []
        slicepoint = self._slicepoint(repo, rangeid)

        # make sure we have cache for all relevant parent first to prevent
        # recursion (python is bad with recursion
        stack = []
        current = rangeid
        while current is not None:
            current = self._unpreparedparentrange(repo, current, slicepoint)
            if current is not None:
                stack.append(current)
        while stack:
            # these call will directly compute the subranges
            self.subranges(repo, stack.pop())
        return self._slicesrangeat(repo, rangeid, slicepoint)

    def _unpreparedparentrange(self, repo, rangeid, slicepoint):
        """return parent range that it would be useful to prepare to slice
        rangeid at slicepoint

        This function also have the important task to update the revscache of
        the parent revs if possible and needed"""
        # is this is a merge, there is not need to prepare the parents.
        p1, p2 = repo.changelog.parentrevs(rangeid[0])
        if p2 != nodemod.nullrev:
            return None
        parentrange = (p1, rangeid[1])
        # if we have an entry for the current range, lets update the cache
        revscache = self._revsinrangecache
        if rangeid in revscache and parentrange not in revscache:
            parentrevs = self._revsinrangecache[rangeid][:-1]
            self._revsinrangecache[parentrange] = parentrevs
        # if we already have subrange for this range, no need to prepare it.
        if self._subrangescache.get(parentrange) is not None:
            return None
        # if we reached the slicepoint, no need to go further
        if self.depthrev(repo, parentrange[0]) == slicepoint:
            return None
        # look like we found a relevent parentrange with no cache yet
        return parentrange

    def _slicepoint(self, repo, rangeid):
        rangedepth = self.depthrev(repo, rangeid[0])
        step = _hlp2(rangedepth)
        standard_start = 0
        while standard_start < rangeid[1] and 0 < step:
            if standard_start + step < rangedepth:
                standard_start += step
            step //= 2
        if rangeid[1] == standard_start:
            slicesize = _hlp2(self.rangelength(repo, rangeid))
            slicepoint = rangeid[1] + slicesize
        else:
            assert standard_start < rangedepth
            slicepoint = standard_start
        return slicepoint

    def _slicesrangeat(self, repo, rangeid, globalindex):
        p1, p2 = repo.changelog.parentrevs(rangeid[0])
        if p2 != nodemod.nullrev:
            return self._slicesrangeatmerge(repo, rangeid, globalindex)
        assert p1 != nodemod.nullrev
        rangedepth = self.depthrev(repo, rangeid[0])
        topsize = rangedepth - globalindex

        parentrange = (p1, rangeid[1])
        if rangeid in self._revsinrangecache:
            # revs cache should have been filled by _unpreparedparentrange
            assert parentrange in self._revsinrangecache

        if topsize == 1:
            top = (rangeid[0], globalindex)
            return [parentrange, top]
        else:
            # This will not initiate a recursion since we took appropriate
            # precaution in the caller of this method to ensure it will be so.
            # It the parent is a merge that will not be the case but computing
            # subranges from a merge will not recurse.
            parentsubranges = self.subranges(repo, parentrange)
            slices = parentsubranges[:-1] # pop the top
            top = (rangeid[0], globalindex)
            # if we have an entry for the current range, lets update the cache
            if rangeid in self._revsinrangecache:
                parentrevs = self._revsinrangecache[rangeid][-topsize:]
                self._revsinrangecache[top] = parentrevs
            slices.append(top)
            return slices

    def _slicesrangeatmerge(self, repo, rangeid, globalindex):
        localindex = globalindex - rangeid[1]
        cl = repo.changelog

        result = []
        allrevs = self.revsfromrange(repo, rangeid)
        toprevs = allrevs[localindex:]
        bottomrevs = allrevs[:localindex]
        top = (rangeid[0], globalindex)
        self._revsinrangecache[top] = toprevs # update cache
        #
        rangedepth = self.depthrev(repo, rangeid[0])
        toprootdepth = self.depthrev(repo, toprevs[0])
        if toprootdepth + self.rangelength(repo, top) == rangedepth + 1:
            bheads = [bottomrevs[-1]]
        else:
            bheads = set(bottomrevs)
            parentrevs = cl.parentrevs
            du = bheads.difference_update
            for r in bottomrevs:
                du(parentrevs(r))
            # if len(bheads) == 1:
            #     assert 1 == len(repo.revs('roots(%ld)', top._revs))
        if len(bheads) == 1:
            newhead = bottomrevs[-1]
            bottomdepth = self.depthrev(repo, newhead)
            newstart = bottomdepth - len(bottomrevs)
            bottom = (newhead, newstart)
            self._revsinrangecache[bottom] = bottomrevs # update cache
            result.append(bottom)
        else:
            # assert 1 < len(bheads), (toprootdepth, len(top), len(rangeid))
            cl = repo.changelog
            for h in bheads:
                subset = cl.ancestors([h], inclusive=True)
                hrevs = [r for r in bottomrevs if r in subset]
                start = self.depthrev(repo, h) - len(hrevs)
                entry = (h, start)
                entryrevs = [r for r in bottomrevs if r in subset]
                self._revsinrangecache[entry] = entryrevs # update cache
                result.append(entry)
        result.append(top)
        return result

@eh.reposetup
def setupcache(ui, repo):

    class stablerangerepo(repo.__class__):

        @localrepo.unfilteredpropertycache
        def stablerange(self):
            return stablerange()

    repo.__class__ = stablerangerepo