obscache: extract a data agnostic class
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 12 May 2017 19:18:49 +0200
changeset 2357 f787f5406a98
parent 2356 d5de0529a48f
child 2358 d8ea60fcfbc9
obscache: extract a data agnostic class We now have an independent class that we can reuse for other purpose (eg: obshashrange cache)
hgext3rd/evolve/obscache.py
--- a/hgext3rd/evolve/obscache.py	Fri May 12 19:07:14 2017 +0200
+++ b/hgext3rd/evolve/obscache.py	Fri May 12 19:18:49 2017 +0200
@@ -117,72 +117,97 @@
         obsdata = obsstore.svfs.tryread('obsstore')
         return _readmarkers(obsdata, byteoffset)[1]
 
-class obscache(object):
-    """cache the "does a rev" is the precursors of some obsmarkers data
 
-    This is not directly holding the "is this revision obsolete" information,
-    because phases data gets into play here. However, it allow to compute the
-    "obsolescence" set without reading the obsstore content.
-
-    Implementation note #1:
-
-      The obsstore is implementing only half of the transaction logic it
-      should. It properly record the starting point of the obsstore to allow
-      clean rollback. However it still write to the obsstore file directly
-      during the transaction. Instead it should be keeping data in memory and
-      write to a '.pending' file to make the data vailable for hooks.
+class dualsourcecache(object):
+    """An abstract class for cache that needs both changelog and obsstore
 
-      This cache is not going futher than what the obstore is doing, so it does
-      not has any '.pending' logic. When the obsstore gains proper '.pending'
-      support, adding it to this cache should not be too hard. As the flag
-      always move from 0 to 1, we could have a second '.pending' cache file to
-      be read. If flag is set in any of them, the value is 1. For the same
-      reason, updating the file in place should be possible.
-
-    Implementation note #2:
-
-      Instead of having a large final update run, we could update this cache at
-      the level adding a new changeset or a new obsmarkers. More on this in the
-      'update code'.
-
-    Implementation note #3:
-
-        Storage-wise, we could have a "start rev" to avoid storing useless
-        zero. That would be especially useful for the '.pending' overlay.
+    This class handle the tracking of changelog and obsstore update. It provide
+    data to performs incremental update (see the 'updatefrom' function for
+    details).  This class can also detect stripping of the changelog or the
+    obsstore and can reset the cache in this cache (see the 'clear' function
+    for details).
     """
 
-    _filepath = 'cache/evoext-obscache-00'
-    _headerformat = '>q20sQQ20s'
-
+    # default key used for an empty cache
+    #
+    # The cache key covering the changesets and obsmarkers content
+    #
+    # The cache key parts are:
+    # - tip-rev,
+    # - tip-node,
+    # - obsstore-length (nb markers),
+    # - obsstore-file-size (in bytes),
+    # - obsstore "cache key"
     emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid)
 
-    def __init__(self, repo):
-        self._vfs = repo.vfs
-        # cache key covering the changesets and obsmarkers content
-        #
-        # It contains the following data. Combined with 'upgradeneeded' it allows to
-        # do iterative upgrade for cache depending of theses two pieces of data.
-        #
-        # The cache key parts are"
-        # - tip-rev,
-        # - tip-node,
-        # - obsstore-length (nb markers),
-        # - obsstore-file-size (in bytes),
-        # - obsstore "cache key"
+    def __init__(self):
         self._cachekey = None
-        self._ondiskkey = None
-        self._data = bytearray()
+
+    def _updatefrom(self, repo, revs, obsmarkers):
+        """override this method to update your cache data incrementally
 
-    def get(self, rev):
-        """return True if "rev" is used as "precursors for any obsmarkers
-
-        Make sure the cache has been updated to match the repository content before using it"""
-        return self._data[rev]
+        revs:      list of new revision in the changelog
+        obsmarker: list of new obsmarkers in the obsstore
+        """
+        raise NotImplementedError
 
     def clear(self, reset=False):
-        """invalidate the cache content"""
+        """invalidate the cache content
+
+        if 'reset' is passed, we detected a strip and the cache will have to be
+        recomputed.
+        """
+        # /!\ IMPORTANT /!\
+        # You must overide this method to actually
         self._cachekey = self.emptykey if reset else None
-        self._data = bytearray()
+
+    # Useful public function (no need to override them)
+
+    def uptodate(self, repo):
+        """return True if the cache content is up to date False otherwise
+
+        This method can be used to detect of the cache is lagging behind new
+        data in either changelog or obsstore.
+        """
+        if self._cachekey is None:
+            self.load(repo)
+        status = self._checkkey(repo.changelog, repo.obsstore)
+        return (status is not None
+                and status[0] == self._cachekey[0] # tiprev
+                and status[1] == self._cachekey[3]) # obssize
+
+    def update(self, repo):
+        """update the cache with new repository data
+
+        The update will be incremental when possible"""
+        # If we do not have any data, try loading from disk
+        if self._cachekey is None:
+            self.load(repo)
+
+        assert repo.filtername is None
+        cl = repo.changelog
+
+        upgrade = self._upgradeneeded(repo)
+        if upgrade is None:
+            return
+
+        reset, revs, obsmarkers, obskeypair = upgrade
+        if reset or self._cachekey is None:
+            self.clear(reset=True)
+
+        self._updatefrom(repo, revs, obsmarkers)
+
+        # update the key from the new data
+        key = list(self._cachekey)
+        if revs:
+            key[0] = len(cl) - 1
+            key[1] = cl.node(key[0])
+        if obsmarkers:
+            key[2] += len(obsmarkers)
+            key[3], key[4] = obskeypair
+        self._cachekey = tuple(key)
+
+    # from here, there are internal function only
 
     def _checkkey(self, changelog, obsstore):
         """internal function"""
@@ -203,15 +228,7 @@
             return None
         return tiprev, obssize, obskey
 
-    def uptodate(self, repo):
-        if self._cachekey is None:
-            self.load(repo)
-        status = self._checkkey(repo.changelog, repo.obsstore)
-        return (status is not None
-                and status[0] == self._cachekey[0] # tiprev
-                and status[1] == self._cachekey[3]) # obssize
-
-    def upgradeneeded(self, repo):
+    def _upgradeneeded(self, repo):
         """return (valid, start-rev, start-obs-idx)
 
         'valid': is "False" if older cache value needs invalidation,
@@ -268,34 +285,62 @@
 
         return reset, revs, markers, (obssize, obskey)
 
-    def update(self, repo):
-        """Iteratively update the cache with new repository data"""
-        # If we do not have any data, try loading from disk
-        if self._cachekey is None:
-            self.load(repo)
+
+class obscache(dualsourcecache):
+    """cache the "does a rev" is the precursors of some obsmarkers data
+
+    This is not directly holding the "is this revision obsolete" information,
+    because phases data gets into play here. However, it allow to compute the
+    "obsolescence" set without reading the obsstore content.
+
+    Implementation note #1:
 
-        assert repo.filtername is None
-        cl = repo.changelog
+      The obsstore is implementing only half of the transaction logic it
+      should. It properly record the starting point of the obsstore to allow
+      clean rollback. However it still write to the obsstore file directly
+      during the transaction. Instead it should be keeping data in memory and
+      write to a '.pending' file to make the data vailable for hooks.
 
-        upgrade = self.upgradeneeded(repo)
-        if upgrade is None:
-            return
+      This cache is not going futher than what the obstore is doing, so it does
+      not has any '.pending' logic. When the obsstore gains proper '.pending'
+      support, adding it to this cache should not be too hard. As the flag
+      always move from 0 to 1, we could have a second '.pending' cache file to
+      be read. If flag is set in any of them, the value is 1. For the same
+      reason, updating the file in place should be possible.
+
+    Implementation note #2:
 
-        reset, revs, obsmarkers, obskeypair = upgrade
-        if reset or self._cachekey is None:
-            self.clear(reset=True)
+      Instead of having a large final update run, we could update this cache at
+      the level adding a new changeset or a new obsmarkers. More on this in the
+      'update code'.
+
+    Implementation note #3:
 
-        def _updatefrom(repo, revs, obsmarkers)
+        Storage-wise, we could have a "start rev" to avoid storing useless
+        zero. That would be especially useful for the '.pending' overlay.
+    """
+
+    _filepath = 'cache/evoext-obscache-00'
+    _headerformat = '>q20sQQ20s'
+
+    emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid)
 
-        # update the key from the new data
-        key = list(self._cachekey)
-        if revs:
-            key[0] = len(cl) - 1
-            key[1] = cl.node(key[0])
-        if obsmarkers:
-            key[2] += len(obsmarkers)
-            key[3], key[4] = obskeypair
-        self._cachekey = tuple(key)
+    def __init__(self, repo):
+        super(obscache, self).__init__()
+        self._ondiskkey = None
+        self._vfs = repo.vfs
+        self._data = bytearray()
+
+    def get(self, rev):
+        """return True if "rev" is used as "precursors for any obsmarkers
+
+        Make sure the cache has been updated to match the repository content before using it"""
+        return self._data[rev]
+
+    def clear(self, reset=False):
+        """invalidate the cache content"""
+        super(obscache, self).clear(reset=reset)
+        self._data = bytearray()
 
     def _updatefrom(self, repo, revs, obsmarkers):
         if revs: