topicmap: massive rework
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 22 Jun 2017 10:13:29 +0200
changeset 2653 13313d0cab71
parent 2652 839c2879edcc
child 2654 320f4faef18c
topicmap: massive rework Massively rework the way we build and use topicmap. This bring massive performance benefit. Topic map use to be a fully independant thing that we would switch on and off globaly. The caching on disk was broken so the performance were atrocious. Intead, now the topic are inherited from the 'immutable' map. We gave up on storing them on disk for now since the mutable set is usually small enough. The activation is done by hacking new "filter" on the repository and detection when they are one. This is hacky but core is hard to wrap here. Overall this whole wrapping is really scary and we should massage core API to help it.
README
hgext3rd/topic/__init__.py
hgext3rd/topic/destination.py
hgext3rd/topic/discovery.py
hgext3rd/topic/topicmap.py
--- a/README	Thu Jun 22 09:47:14 2017 +0200
+++ b/README	Thu Jun 22 10:13:29 2017 +0200
@@ -131,6 +131,7 @@
  - stack: properly abort when and unknown topic is requested,
  - topic: changing topic on revs no longer adds extra instability (issue5441)
  - topic: topics: rename '--change' flag to '--rev' flag,
+ - topic: multiple large performance improvements,
 
 6.4.1 - in progress
 -------------------
--- a/hgext3rd/topic/__init__.py	Thu Jun 22 09:47:14 2017 +0200
+++ b/hgext3rd/topic/__init__.py	Thu Jun 22 10:13:29 2017 +0200
@@ -56,7 +56,6 @@
 
 from mercurial.i18n import _
 from mercurial import (
-    branchmap,
     cmdutil,
     commands,
     context,
@@ -169,6 +168,8 @@
     if not isinstance(repo, localrepo.localrepository):
         return # this can be a peer in the ssh case (puzzling)
 
+    repo = repo.unfiltered()
+
     if repo.ui.config('experimental', 'thg.displaynames', None) is None:
         repo.ui.setconfig('experimental', 'thg.displaynames', 'topics',
                           source='topic-extension')
@@ -191,6 +192,11 @@
                 self.ui.restoreconfig(backup)
 
         def commitctx(self, ctx, error=None):
+            topicfilter = topicmap.topicfilter(self.filtername)
+            if topicfilter != self.filtername:
+                other = repo.filtered(topicmap.topicfilter(repo.filtername))
+                other.commitctx(ctx, error=error)
+
             if isinstance(ctx, context.workingcommitctx):
                 current = self.currenttopic
                 if current:
@@ -201,8 +207,7 @@
                 not self.currenttopic):
                 # we are amending and need to remove a topic
                 del ctx.extra()[constants.extrakey]
-            with topicmap.usetopicmap(self):
-                return super(topicrepo, self).commitctx(ctx, error=error)
+            return super(topicrepo, self).commitctx(ctx, error=error)
 
         @property
         def topics(self):
@@ -219,23 +224,21 @@
         def currenttopic(self):
             return self.vfs.tryread('topic')
 
-        def branchmap(self, topic=True):
-            if not topic:
-                super(topicrepo, self).branchmap()
-            with topicmap.usetopicmap(self):
-                branchmap.updatecache(self)
-            return self._topiccaches[self.filtername]
+        # overwritten at the instance level by topicmap.py
+        _autobranchmaptopic = True
 
-        def destroyed(self, *args, **kwargs):
-            with topicmap.usetopicmap(self):
-                return super(topicrepo, self).destroyed(*args, **kwargs)
+        def branchmap(self, topic=None):
+            if topic is None:
+                topic = self._autobranchmaptopic
+            topicfilter = topicmap.topicfilter(self.filtername)
+            if not topic or topicfilter == self.filtername:
+                return super(topicrepo, self).branchmap()
+            return self.filtered(topicfilter).branchmap()
 
         def invalidatevolatilesets(self):
             # XXX we might be able to move this to something invalidated less often
             super(topicrepo, self).invalidatevolatilesets()
             self._topics = None
-            if '_topiccaches' in vars(self.unfiltered()):
-                self.unfiltered()._topiccaches.clear()
 
         def peer(self):
             peer = super(topicrepo, self).peer()
--- a/hgext3rd/topic/destination.py	Thu Jun 22 09:47:14 2017 +0200
+++ b/hgext3rd/topic/destination.py	Thu Jun 22 10:13:29 2017 +0200
@@ -89,14 +89,14 @@
     # but that is expensive
     #
     # we should write plain code instead
-    with topicmap.usetopicmap(repo):
-        tmap = repo.branchmap()
-        if branch not in tmap:
-            return []
-        elif all:
-            return tmap.branchheads(branch)
-        else:
-            return [tmap.branchtip(branch)]
+
+    tmap = topicmap.gettopicrepo(repo).branchmap()
+    if branch not in tmap:
+        return []
+    elif all:
+        return tmap.branchheads(branch)
+    else:
+        return [tmap.branchtip(branch)]
 
 def modsetup(ui):
     """run a uisetup time to install all destinations wrapping"""
--- a/hgext3rd/topic/discovery.py	Thu Jun 22 09:47:14 2017 +0200
+++ b/hgext3rd/topic/discovery.py	Thu Jun 22 10:13:29 2017 +0200
@@ -4,7 +4,6 @@
 
 from mercurial.i18n import _
 from mercurial import (
-    branchmap,
     bundle2,
     discovery,
     error,
@@ -13,15 +12,15 @@
     wireproto,
 )
 
-from . import topicmap
-
 def _headssummary(orig, *args):
     # In mercurial < 4.2, we receive repo, remote and outgoing as arguments
     if len(args) == 3:
+        pushoparg = False
         repo, remote, outgoing = args
 
     # In mercurial > 4.3, we receive the pushop as arguments
     elif len(args) == 1:
+        pushoparg = True
         pushop = args[0]
         repo = pushop.repo.unfiltered()
         remote = pushop.remote
@@ -33,38 +32,44 @@
                   or bool(remote.listkeys('phases').get('publishing', False)))
     if publishing or not remote.capable('topics'):
         return orig(*args)
-    oldrepo = repo.__class__
-    oldbranchcache = branchmap.branchcache
-    oldfilename = branchmap._filename
-    try:
-        class repocls(repo.__class__):
-            def __getitem__(self, key):
-                ctx = super(repocls, self).__getitem__(key)
-                oldbranch = ctx.branch
+
+    class repocls(repo.__class__):
+        def __getitem__(self, key):
+            ctx = super(repocls, self).__getitem__(key)
+            oldbranch = ctx.branch
+
+            def branch():
+                branch = oldbranch()
+                topic = ctx.topic()
+                if topic:
+                    branch = "%s:%s" % (branch, topic)
+                return branch
 
-                def branch():
-                    branch = oldbranch()
-                    topic = ctx.topic()
-                    if topic:
-                        branch = "%s:%s" % (branch, topic)
-                    return branch
+            ctx.branch = branch
+            return ctx
 
-                ctx.branch = branch
-                return ctx
-
+    oldrepo = repo.__class__
+    try:
         repo.__class__ = repocls
-        branchmap.branchcache = topicmap.topiccache
-        branchmap._filename = topicmap._filename
-        summary = orig(*args)
+        unxx = repo.filtered('unfiltered-topic')
+        repo.unfiltered = lambda: unxx
+        if pushoparg:
+            try:
+                pushop.repo = repo
+                summary = orig(pushop)
+            finally:
+                pushop.repo = repo
+        else:
+            summary = orig(repo, remote, outgoing)
         for key, value in summary.iteritems():
             if ':' in key: # This is a topic
                 if value[0] is None and value[1]:
                     summary[key] = ([value[1].pop(0)], ) + value[1:]
         return summary
     finally:
+        if 'unfiltered' in vars(repo):
+            del repo.unfiltered
         repo.__class__ = oldrepo
-        branchmap.branchcache = oldbranchcache
-        branchmap._filename = oldfilename
 
 def wireprotobranchmap(orig, repo, proto):
     oldrepo = repo.__class__
--- a/hgext3rd/topic/topicmap.py	Thu Jun 22 09:47:14 2017 +0200
+++ b/hgext3rd/topic/topicmap.py	Thu Jun 22 10:13:29 2017 +0200
@@ -1,27 +1,63 @@
 import contextlib
 import hashlib
 
-from mercurial.node import hex, bin, nullid
+from mercurial.node import nullid
 from mercurial import (
     branchmap,
     changegroup,
     cmdutil,
-    encoding,
-    error,
     extensions,
-    scmutil,
+    repoview,
 )
 
-def _filename(repo):
-    """name of a branchcache file for a given repo or repoview"""
-    filename = "cache/topicmap"
-    if repo.filtername:
-        filename = '%s-%s' % (filename, repo.filtername)
-    return filename
+basefilter = set(['base', 'immutable'])
+def topicfilter(name):
+    """return a "topic" version of a filter level"""
+    if name in basefilter:
+        return name
+    elif name is None:
+        return None
+    elif name.endswith('-topic'):
+        return name
+    else:
+        return name + '-topic'
+
+def istopicfilter(filtername):
+    if filtername is None:
+        return False
+    return filtername.endswith('-topic')
+
+def gettopicrepo(repo):
+    filtername = topicfilter(repo.filtername)
+    if filtername == repo.filtername:
+        return repo
+    return repo.filtered(filtername)
 
-oldbranchcache = branchmap.branchcache
+def _setuptopicfilter(ui):
+    """extend the filter related mapping with topic related one"""
+    funcmap = repoview.filtertable
+    partialmap = branchmap.subsettable
+
+    # filter level not affected by topic that we should not override
+
+    for plainname in list(funcmap):
+        newfilter = topicfilter(plainname)
+        if newfilter == plainname:
+            continue
+
+        def revsfunc(repo, name=plainname):
+            return repoview.filterrevs(repo, name)
+
+        base = topicfilter(partialmap[plainname])
+
+        if newfilter not in funcmap:
+            funcmap[newfilter] = revsfunc
+            partialmap[newfilter] = base
+    funcmap['unfiltered-topic'] = lambda repo: frozenset()
+    partialmap['unfiltered-topic'] = 'visible-topic'
 
 def _phaseshash(repo, maxrev):
+    """uniq ID for a phase matching a set of rev"""
     revs = set()
     cl = repo.changelog
     fr = cl.filteredrevs
@@ -40,61 +76,65 @@
         key = s.digest()
     return key
 
-@contextlib.contextmanager
-def usetopicmap(repo):
-    """use awful monkey patching to ensure topic map usage
+def modsetup(ui):
+    """call at uisetup time to install various wrappings"""
+    _setuptopicfilter(ui)
+    _wrapbmcache(ui)
+    extensions.wrapfunction(changegroup.cg1unpacker, 'apply', cgapply)
+    extensions.wrapfunction(cmdutil, 'commitstatus', commitstatus)
 
-    During the extend of the context block, The topicmap should be used and
-    updated instead of the branchmap."""
-    oldbranchcache = branchmap.branchcache
-    oldfilename = branchmap._filename
-    oldread = branchmap.read
-    oldcaches = getattr(repo, '_branchcaches', {})
-    try:
-        branchmap.branchcache = topiccache
-        branchmap._filename = _filename
-        branchmap.read = readtopicmap
-        repo._branchcaches = getattr(repo, '_topiccaches', {})
-        yield
-        repo._topiccaches = repo._branchcaches
-    finally:
-        repo._branchcaches = oldcaches
-        branchmap.branchcache = oldbranchcache
-        branchmap._filename = oldfilename
-        branchmap.read = oldread
-
-def cgapply(orig, repo, *args, **kwargs):
+def cgapply(orig, self, repo, *args, **kwargs):
     """make sure a topicmap is used when applying a changegroup"""
-    with usetopicmap(repo):
-        return orig(repo, *args, **kwargs)
+    other = repo.filtered(topicfilter(repo.filtername))
+    return orig(self, other, *args, **kwargs)
 
 def commitstatus(orig, repo, node, branch, bheads=None, opts=None):
     # wrap commit status use the topic branch heads
     ctx = repo[node]
     if ctx.topic() and ctx.branch() == branch:
-        bheads = repo.branchheads("%s:%s" % (branch, ctx.topic()))
+        subbranch = "%s:%s" % (branch, ctx.topic())
+        bheads = repo.branchheads("%s:%s" % (subbranch, ctx.topic()))
     return orig(repo, node, branch, bheads=bheads, opts=opts)
 
-class topiccache(oldbranchcache):
+def _wrapbmcache(ui):
+    class topiccache(_topiccache, branchmap.branchcache):
+        pass
+    branchmap.branchcache = topiccache
+    extensions.wrapfunction(branchmap, 'updatecache', _wrapupdatebmcache)
+
+def _wrapupdatebmcache(orig, repo):
+    previous = repo._autobranchmaptopic
+    try:
+        repo._autobranchmaptopic = False
+        return orig(repo)
+    finally:
+        repo._autobranchmaptopic = previous
+
+# needed to prevent reference used for 'super()' call using in branchmap.py to
+# no go into cycle. (yes, URG)
+_oldbranchmap = branchmap.branchcache
+
+@contextlib.contextmanager
+def oldbranchmap():
+    previous = branchmap.branchcache
+    try:
+        branchmap.branchcache = _oldbranchmap
+        yield
+    finally:
+        branchmap.branchcache = previous
+
+class _topiccache(object): # combine me with branchmap.branchcache
 
     def __init__(self, *args, **kwargs):
-        otherbranchcache = branchmap.branchcache
-        try:
-            # super() call may fail otherwise
-            branchmap.branchcache = oldbranchcache
-            super(topiccache, self).__init__(*args, **kwargs)
-            if self.filteredhash is None:
-                self.filteredhash = nullid
-            self.phaseshash = nullid
-        finally:
-            branchmap.branchcache = otherbranchcache
+        # super() call may fail otherwise
+        with oldbranchmap():
+            super(_topiccache, self).__init__(*args, **kwargs)
+        self.phaseshash = None
 
     def copy(self):
         """return an deep copy of the branchcache object"""
-        new = topiccache(self, self.tipnode, self.tiprev, self.filteredhash,
-                         self._closednodes)
-        if self.filteredhash is None:
-            self.filteredhash = nullid
+        new = self.__class__(self, self.tipnode, self.tiprev, self.filteredhash,
+                             self._closednodes)
         new.phaseshash = self.phaseshash
         return new
 
@@ -104,144 +144,61 @@
         Raise KeyError for unknown branch.'''
         if topic:
             branch = '%s:%s' % (branch, topic)
-        return super(topiccache, self).branchtip(branch)
+        return super(_topiccache, self).branchtip(branch)
 
     def branchheads(self, branch, closed=False, topic=''):
         if topic:
             branch = '%s:%s' % (branch, topic)
-        return super(topiccache, self).branchheads(branch, closed=closed)
+        return super(_topiccache, self).branchheads(branch, closed=closed)
 
     def validfor(self, repo):
         """Is the cache content valid regarding a repo
 
         - False when cached tipnode is unknown or if we detect a strip.
         - True when cache is up to date or a subset of current repo."""
-        # This is copy paste of mercurial.branchmap.branchcache.validfor in
-        # 69077c65919d With a small changes to the cache key handling to
-        # include phase information that impact the topic cache.
-        #
-        # All code changes should be flagged on site.
-        try:
-            if (self.tipnode == repo.changelog.node(self.tiprev)):
-                fh = scmutil.filteredhash(repo, self.tiprev)
-                if fh is None:
-                    fh = nullid
-                if ((self.filteredhash == fh)
-                    and (self.phaseshash == _phaseshash(repo, self.tiprev))):
-                    return True
+        valid = super(_topiccache, self).validfor(repo)
+        if not valid:
             return False
-        except IndexError:
-            return False
+        elif not istopicfilter(repo.filtername) or self.phaseshash is None:
+            # phasehash at None means this is a branchmap
+            # come from non topic thing
+            return True
+        else:
+            try:
+                valid = self.phaseshash == _phaseshash(repo, self.tiprev)
+                return valid
+            except IndexError:
+                return False
 
     def write(self, repo):
-        # This is copy paste of mercurial.branchmap.branchcache.write in
-        # 69077c65919d With a small changes to the cache key handling to
-        # include phase information that impact the topic cache.
-        #
-        # All code changes should be flagged on site.
-        try:
-            f = repo.vfs(_filename(repo), "w", atomictemp=True)
-            cachekey = [hex(self.tipnode), str(self.tiprev)]
-            # [CHANGE] we need a hash in all cases
-            assert self.filteredhash is not None
-            cachekey.append(hex(self.filteredhash))
-            cachekey.append(hex(self.phaseshash))
-            f.write(" ".join(cachekey) + '\n')
-            nodecount = 0
-            for label, nodes in sorted(self.iteritems()):
-                for node in nodes:
-                    nodecount += 1
-                    if node in self._closednodes:
-                        state = 'c'
-                    else:
-                        state = 'o'
-                    f.write("%s %s %s\n" % (hex(node), state,
-                                            encoding.fromlocal(label)))
-            f.close()
-            repo.ui.log('branchcache',
-                        'wrote %s branch cache with %d labels and %d nodes\n',
-                        repo.filtername, len(self), nodecount)
-        except (IOError, OSError, error.Abort) as inst:
-            repo.ui.debug("couldn't write branch cache: %s\n" % inst)
-            # Abort may be raise by read only opener
-            pass
+        # we expect mutable set to be small enough to be that computing it all
+        # the time will be fast enough
+        if not istopicfilter(repo.filtername):
+            super(_topiccache, self).write(repo)
 
     def update(self, repo, revgen):
         """Given a branchhead cache, self, that may have extra nodes or be
         missing heads, and a generator of nodes that are strictly a superset of
         heads missing, this function updates self to be correct.
         """
-        oldgetbranchinfo = repo.revbranchcache().branchinfo
+        if not istopicfilter(repo.filtername):
+            return super(_topiccache, self).update(repo, revgen)
+        unfi = repo.unfiltered()
+        oldgetbranchinfo = unfi.revbranchcache().branchinfo
+
+        def branchinfo(r):
+            info = oldgetbranchinfo(r)
+            topic = ''
+            ctx = unfi[r]
+            if ctx.mutable():
+                topic = ctx.topic()
+            branch = info[0]
+            if topic:
+                branch = '%s:%s' % (branch, topic)
+            return (branch, info[1])
         try:
-            def branchinfo(r):
-                info = oldgetbranchinfo(r)
-                topic = ''
-                ctx = repo[r]
-                if ctx.mutable():
-                    topic = ctx.topic()
-                branch = info[0]
-                if topic:
-                    branch = '%s:%s' % (branch, topic)
-                return (branch, info[1])
-            repo.revbranchcache().branchinfo = branchinfo
-            super(topiccache, self).update(repo, revgen)
-            if self.filteredhash is None:
-                self.filteredhash = nullid
+            unfi.revbranchcache().branchinfo = branchinfo
+            super(_topiccache, self).update(repo, revgen)
             self.phaseshash = _phaseshash(repo, self.tiprev)
         finally:
-            repo.revbranchcache().branchinfo = oldgetbranchinfo
-
-def readtopicmap(repo):
-    # This is copy paste of mercurial.branchmap.read in 69077c65919d
-    # With a small changes to the cache key handling to include phase
-    # information that impact the topic cache.
-    #
-    # All code changes should be flagged on site.
-    try:
-        f = repo.vfs(_filename(repo))
-        lines = f.read().split('\n')
-        f.close()
-    except (IOError, OSError):
-        return None
-
-    try:
-        cachekey = lines.pop(0).split(" ", 2)
-        last, lrev = cachekey[:2]
-        last, lrev = bin(last), int(lrev)
-        filteredhash = bin(cachekey[2]) # [CHANGE] unconditional filteredhash
-        partial = topiccache(tipnode=last, tiprev=lrev,
-                             filteredhash=filteredhash)
-        partial.phaseshash = bin(cachekey[3]) # [CHANGE] read phaseshash
-        if not partial.validfor(repo):
-            # invalidate the cache
-            raise ValueError('tip differs')
-        cl = repo.changelog
-        for l in lines:
-            if not l:
-                continue
-            node, state, label = l.split(" ", 2)
-            if state not in 'oc':
-                raise ValueError('invalid branch state')
-            label = encoding.tolocal(label.strip())
-            node = bin(node)
-            if not cl.hasnode(node):
-                raise ValueError('node %s does not exist' % hex(node))
-            partial.setdefault(label, []).append(node)
-            if state == 'c':
-                partial._closednodes.add(node)
-    except KeyboardInterrupt:
-        raise
-    except Exception as inst:
-        if repo.ui.debugflag:
-            msg = 'invalid branchheads cache'
-            if repo.filtername is not None:
-                msg += ' (%s)' % repo.filtername
-            msg += ': %s\n'
-            repo.ui.debug(msg % inst)
-        partial = None
-    return partial
-
-def modsetup(ui):
-    """call at uisetup time to install various wrappings"""
-    extensions.wrapfunction(changegroup.cg1unpacker, 'apply', cgapply)
-    extensions.wrapfunction(cmdutil, 'commitstatus', commitstatus)
+            unfi.revbranchcache().branchinfo = oldgetbranchinfo