cache: introduce an abstract class for cache we can upgrade incrementally
authorPierre-Yves David <pierre-yves.david@octobus.net>
Wed, 22 Nov 2017 13:40:05 +0100
changeset 3237 9a2cc4687cb9
parent 3236 7c78b0c482a1
child 3238 88f11b9881b2
cache: introduce an abstract class for cache we can upgrade incrementally Right now each class implements their own mechanism for validation, and update. We start introducing abstract class to ultimately allow more unification of the cache code. The end goal of this series is to introduce a cache for some obsolescence property, not to actually implement the cache. However, taking advantage of adding a new cache to introduce the abstract class seems like a win. This code was headed for core during the 4.3 cycle but never made it there. So we starts with a copy in the evolve repository.
hgext3rd/evolve/genericcaches.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hgext3rd/evolve/genericcaches.py	Wed Nov 22 13:40:05 2017 +0100
@@ -0,0 +1,127 @@
+# cache.py - utilities for caching
+#
+# Copyright 2017 Octobus SAS <contact@octobus.net>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+from __future__ import absolute_import
+
+import abc
+import struct
+
+from mercurial import (
+    util,
+)
+
+class incrementalcachebase(object):
+    """base class for incremental cache from append only source
+
+    There are multiple append only data source we might want to cache
+    computation from. One of the common pattern is to track the state of the
+    file and update the cache with the extra data (eg: branchmap-cache tracking
+    changelog). This pattern also needs to detect when a the source is striped
+
+    The overall pattern is similar whatever the actual source is. This class
+    introduces the basic patterns.
+    """
+
+    __metaclass__ = abc.ABCMeta
+
+    # default key used for an empty cache
+    emptykey = ()
+
+    _cachekeyspec = '' # used for serialization
+    _cachename = None # used for debug message
+
+    @abc.abstractmethod
+    def __init__(self):
+        super(incrementalcachebase, self).__init__()
+        self._cachekey = None
+
+    @util.propertycache
+    def _cachekeystruct(self):
+        # dynamic property to help subclass to change it
+        return struct.Struct('>' + self._cachekeyspec)
+
+    @util.propertycache
+    def _cachekeysize(self):
+        # dynamic property to help subclass to change it
+        return self._cachekeystruct.size
+
+    @abc.abstractmethod
+    def _updatefrom(self, repo, data):
+        """override this method to update you date from incrementally read data.
+
+        Content of <data> will depends of the sources.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def clear(self, reset=False):
+        """invalidate the cache content
+
+        if 'reset' is passed, we detected a strip and the cache will have to be
+        recomputed.
+
+        Subclasses MUST overide this method to actually affect the cache data.
+        """
+        if reset:
+            self._cachekey = self.emptykey if reset else None
+        else:
+            self._cachekey = None
+
+    @abc.abstractmethod
+    def load(self, repo):
+        """Load data from disk
+
+        Subclasses MUST restore the "cachekey" attribute while doing so.
+        """
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _fetchupdatedata(self, repo):
+        """Check the source for possible changes and return necessary data
+
+        The return is a tree elements tuple: reset, data, cachekey
+
+        * reset: `True` when a strip is detected and cache need to be reset
+        * data: new data to take in account, actual type depends of the source
+        * cachekey: the cache key covering <data> and precious covered data
+        """
+        raise NotImplementedError
+
+    # Useful "public" function (no need to override them)
+
+    def update(self, repo):
+        """update the cache with new repository data
+
+        The update will be incremental when possible"""
+        repo = repo.unfiltered()
+
+        # If we do not have any data, try loading from disk
+        if self._cachekey is None:
+            self.load(repo)
+
+        reset, data, newkey = self._fetchupdatedata(repo)
+        if newkey == self._cachekey:
+            return
+        if reset or self._cachekey is None:
+            repo.ui.log('cache', 'strip detected, %s cache reset\n'
+                        % self._cachename)
+            self.clear(reset=True)
+
+        starttime = util.timer()
+        self._updatefrom(repo, data)
+        duration = util.timer() - starttime
+        repo.ui.log('cache', 'updated %s in %.4f seconds\n',
+                    self._cachename, duration)
+
+        self._cachekey = newkey
+
+    def _serializecachekey(self):
+        """provide a bytes version of the cachekey"""
+        return self._cachekeystruct.pack(*self._cachekey)
+
+    def _deserializecachekey(self, data):
+        """read the cachekey from bytes"""
+        return self._cachekeystruct.unpack(data)