evolve: changeset 2332:77c184c80e3e

--- a/hgext3rd/evolve/obscache.py	Thu May 04 21:21:59 2017 +0200
+++ b/hgext3rd/evolve/obscache.py	Thu May 04 21:24:02 2017 +0200
@@ -13,6 +13,7 @@
 import errno
 
 from mercurial import (
+    error,
     localrepo,
     obsolete,
     phases,
@@ -20,6 +21,8 @@
     util,
 )
 
+from mercurial.i18n import _
+
 from . import (
     exthelper,
 )
@@ -160,6 +163,31 @@
 
     return True, startrev, startidx
 
+
+# XXX copied as is from Mercurial 4.2 and added the "offset" parameters
+@util.nogc
+def _readmarkers(data, offset=None):
+    """Read and enumerate markers from raw data"""
+    off = 0
+    diskversion = struct.unpack('>B', data[off:off + 1])[0]
+    if offset is None:
+        off += 1
+    else:
+        assert 1 <= offset
+        off = offset
+    if diskversion not in obsolete.formats:
+        raise error.Abort(_('parsing obsolete marker: unknown version %r')
+                          % diskversion)
+    return diskversion, obsolete.formats[diskversion][0](data, off)
+
+def markersfrom(obsstore, byteoffset, firstmarker):
+    if '_all' in vars(obsstore):
+        # if the data are in memory, just use that
+        return obsstore._all[firstmarker:]
+    else:
+        obsdata = obsstore.svfs.tryread('obsstore')
+        return _readmarkers(obsdata, byteoffset)[1]
+
 class obscache(object):
     """cache the "does a rev" is the precursors of some obsmarkers data
 
@@ -239,66 +267,70 @@
         if startrev is None and startidx is None:
             return
 
+        # checks we never run 'update' without a lock
+        #
+        # There are a potential race condition otherwise, since the repo
+        # "might" have changed side the cache update above. However, this code
+        # will only be running in a lock so we ignore the issue for now.
+        #
+        # Lift this limitation, 'upgradeneeded' should return a bounded amount
+        # of changeset and markers to read with their associated cachekey. see
+        # 'upgradeneeded' for detail.
+        assert repo._currentlock(repo._lockref) is not None
+
         # process the new changesets
         cl = repo.changelog
         if startrev is not None:
-            node = cl.node
-            # Note:
-            #
-            #  Newly added changeset might be affected by obsolescence markers
-            #  we already have locally. So we needs to have soem global
-            #  knowledge about the markers to handle that question. Right this
-            #  requires parsing all markers in the obsstore. However, we could
-            #  imagine using various optimisation (eg: bloom filter, other on
-            #  disk cache) to remove this full parsing.
-            #
-            #  For now we stick to the simpler approach or paying the
-            #  performance cost on new changesets.
-            succs = repo.obsstore.successors
-            for r in cl.revs(startrev):
-                if node(r) in succs:
-                    val = 1
-                else:
-                    val = 0
-                self._data.append(val)
+            self._updaterevs(repo, cl.revs(startrev))
         assert len(self._data) == len(cl), (len(self._data), len(cl))
 
         # process the new obsmarkers
         if startidx is not None:
-            rev = cl.nodemap.get
-            markers = repo.obsstore._all
-            # Note:
-            #
-            #   There are no actually needs to load the full obsstore here,
-            #   since we only read the latest ones.  We do it for simplicity in
-            #   the first implementation. Loading the full obsstore has a
-            #   performance cost and should go away in this case too. We have
-            #   two simples options for that:
-            #
-            #   1) provide and API to start reading markers from a byte offset
-            #      (we have that data in the cache key)
-            #
-            #   2) directly update the cache at a lower level, in the code
-            #      responsible for adding a markers.
-            #
-            #   Option 2 is probably a bit more invasive, but more solid on the long run
+            if startidx == 0: # all markers
+                markers = repo.obsstore._all
+            else:
+                markers = markersfrom(repo.obsstore, self._cachekey[3], startidx)
+            self._updatemarkers(repo, markers)
+
+        self._cachekey = getcachekey(repo)
+
+    def _updaterevs(self, repo, revs):
+        """update the cache with new revisions
+
+        Newly added changeset might be affected by obsolescence markers
+        we already have locally. So we needs to have some global
+        knowledge about the markers to handle that question.
+
+        Right now this requires parsing all markers in the obsstore. We could
+        imagine using various optimisation (eg: another cache, network
+        exchange, etc).
 
-            for i in xrange(startidx, len(repo.obsstore)):
-                r = rev(markers[i][0])
-                # If markers affect a newly added nodes, it would have been
-                # caught in the previous loop, (so we skip < startrev)
-                if r is not None and (startrev is None or r < startrev):
-                    self._data[r] = 1
+        A possible approach to this is to build a set of all node used as
+        precursors in `obsstore._obscandidate`. If markers are not loaded yet,
+        we could initialize it by doing a quick scan through the obsstore data
+        and filling a (pre-sized) set. Doing so would be much faster than
+        parsing all the obsmarkers since we would access less data, not create
+        any object beside the nodes and not have to decode any complex data.
 
-        assert repo._currentlock(repo._lockref) is not None
-        # XXX note that there are a potential race condition here, since the
-        # repo "might" have changed side the cache update above. However, this
-        # code will only be running in a lock so we ignore the issue for now.
-        #
-        # To work around this, 'upgradeneeded' should return a bounded amount
-        # of changeset and markers to read with their associated cachekey. see
-        # 'upgradeneeded' for detail.
-        self._cachekey = getcachekey(repo)
+        For now we stick to the simpler approach of paying the
+        performance cost on new changesets.
+        """
+        node = repo.changelog.node
+        succs = repo.obsstore.successors
+        for r in revs:
+            if node(r) in succs:
+                val = 1
+            else:
+                val = 0
+            self._data.append(val)
+
+    def _updatemarkers(self, repo, obsmarkers):
+        """update the cache with new markers"""
+        rev = repo.changelog.nodemap.get
+        for m in obsmarkers:
+            r = rev(m[0])
+            if r is not None:
+                self._data[r] = 1
 
     def save(self, repo):
         """save the data to disk"""
@@ -339,7 +371,7 @@
     if notpublic:
         obscache = repo.obsstore.obscache
         # Since we warm the cache at the end of every transaction, the cache
-        # should be up to date. However a non-enabled client might have touced
+        # should be up to date. However a non-enabled client might have touched
         # the repository.
         #
         # Updating the cache without a lock is sloppy, so we fallback to the
@@ -348,7 +380,7 @@
         #
         # With the current implementation updating the cache will requires to
         # load the obsstore anyway. Once loaded, hitting the obsstore directly
-        # will be about as fast..
+        # will be about as fast...
         if not obscache.uptodate(repo):
             if repo.currenttransaction() is None:
                 repo.ui.log('evoext-obscache',
@@ -357,8 +389,9 @@
                 repo.ui.debug('obscache is out of date')
                 return orig(repo)
             else:
-                # If a transaction is open, it is worthwhile to update and use the
-                # cache as it will be written on disk when the transaction close.
+                # If a transaction is open, it is worthwhile to update and use
+                # the cache, the lock prevent race and it will be written on
+                # disk when the transaction close.
                 obscache.update(repo)
         isobs = obscache.get
     for r in notpublic:
@@ -391,11 +424,10 @@
                 if repo is None:
                     return
                 repo = repo.unfiltered()
-                # As pointed in 'obscache.update', we could have the
-                # changelog and the obsstore in charge of updating the
-                # cache when new items goes it. The tranaction logic would
-                # then only be involved for the 'pending' and final saving
-                # logic.
+                # As pointed in 'obscache.update', we could have the changelog
+                # and the obsstore in charge of updating the cache when new
+                # items goes it. The tranaction logic would then only be
+                # involved for the 'pending' and final writing on disk.
                 self.obsstore.obscache.update(repo)
                 self.obsstore.obscache.save(repo)
author	Pierre-Yves David <pierre-yves.david@octobus.net>
	Thu, 04 May 2017 21:24:02 +0200
changeset 2332	77c184c80e3e
parent 2330	d72c8c1f09e2 (diff)
parent 2331	d49f376598f8 (current diff)
child 2333	adf114c767ab