obshashrange: use the dualsourcecache as a base for the cache
authorPierre-Yves David <pierre-yves.david@octobus.net>
Fri, 12 May 2017 20:29:54 +0200
changeset 2360 89938bebc6f9
parent 2359 ff635fa59a25
child 2361 5c5f982b98f7
obshashrange: use the dualsourcecache as a base for the cache This will easily open the way to incrementally updated obshashrange cache. Small update are needed to the data base schema so be bump the version Currently the update function is not warming the cache (but details case where it is invalidated).
hgext3rd/evolve/obsdiscovery.py
--- a/hgext3rd/evolve/obsdiscovery.py	Fri May 12 20:28:09 2017 +0200
+++ b/hgext3rd/evolve/obsdiscovery.py	Fri May 12 20:29:54 2017 +0200
@@ -46,6 +46,7 @@
 
 from . import (
     exthelper,
+    obscache,
     utility,
     stablerange,
 )
@@ -356,6 +357,7 @@
     """return the obsolete hash associated to a range"""
     cache = repo.obsstore.rangeobshashcache
     cl = repo.changelog
+    cache.update(repo)
     obshash = cache.get(rangeid)
     if obshash is not None:
         return obshash
@@ -392,10 +394,11 @@
 
 _sqliteschema = [
     """CREATE TABLE meta(schemaversion INTEGER NOT NULL,
+                         tiprev        INTEGER NOT NULL,
+                         tipnode       BLOB    NOT NULL,
                          nbobsmarker   INTEGER NOT NULL,
-                         obstipdata    BLOB    NOT NULL,
-                         tiprev        INTEGER NOT NULL,
-                         tipnode       BLOB    NOT NULL
+                         obssize       BLOB    NOT NULL,
+                         obskey        BLOB    NOT NULL
                         );""",
     """CREATE TABLE obshashrange(rev     INTEGER NOT NULL,
                                  idx     INTEGER NOT NULL,
@@ -404,31 +407,41 @@
     "CREATE INDEX range_index ON obshashrange(rev, idx);",
 ]
 _queryexist = "SELECT name FROM sqlite_master WHERE type='table' AND name='meta';"
-_newmeta = """INSERT INTO meta (schemaversion, nbobsmarker, obstipdata, tiprev, tipnode)
-            VALUES (?,?,?,?,?);"""
+_newmeta = """INSERT INTO meta (schemaversion, tiprev, tipnode, nbobsmarker, obssize, obskey)
+            VALUES (?,?,?,?,?, ?);"""
 _updateobshash = "INSERT INTO obshashrange(rev, idx, obshash) VALUES (?,?,?);"
-_querymeta = "SELECT schemaversion, nbobsmarker, obstipdata, tiprev, tipnode FROM meta;"
+_querymeta = "SELECT schemaversion, tiprev, tipnode, nbobsmarker, obssize, obskey FROM meta;"
 _queryobshash = "SELECT obshash FROM obshashrange WHERE (rev = ? AND idx = ?);"
 
-class _obshashcache(dict):
+_reset = "DELETE FROM obshashrange;"
 
-    _schemaversion = 0
+class _obshashcache(obscache.dualsourcecache, dict):
+
+    _schemaversion = 1
 
     def __init__(self, repo):
         super(_obshashcache, self).__init__()
-        self._path = repo.vfs.join('cache/evoext_obshashrange_v0.sqlite')
+        self._path = repo.vfs.join('cache/evoext_obshashrange_v1.sqlite')
         self._new = set()
         self._valid = True
         self._repo = weakref.ref(repo.unfiltered())
         # cache status
         self._ondiskcachekey = None
 
-    def clear(self):
+    def clear(self, reset=False):
         self._valid = False
-        super(_obshashcache, self).clear()
+        super(_obshashcache, self).clear(reset=reset)
+        if reset:
+            con = self._con
+            if con is not None:
+                con.execute(_reset)
         self._new.clear()
 
     def get(self, rangeid):
+        # revision should be covered by out tiprev
+        # XXX should be a programming error
+        assert rangeid[0] <= self._cachekey[0]
+
         value = super(_obshashcache, self).get(rangeid)
         if value is None and self._con is not None:
             nrange = (rangeid[0], rangeid[1])
@@ -441,16 +454,42 @@
         self._new.add(rangeid)
         super(_obshashcache, self).__setitem__(rangeid, obshash)
 
-    def _cachekey(self, repo):
-        # XXX for now the cache is very volatile, but this is still a win
-        nbobsmarker = len(repo.obsstore._all)
-        if nbobsmarker:
-            tipdata = obsolete._fm1encodeonemarker(repo.obsstore._all[-1])
-        else:
-            tipdata = node.nullid
-        tiprev = len(repo.changelog) - 1
-        tipnode = repo.changelog.node(tiprev)
-        return (self._schemaversion, nbobsmarker, tipdata, tiprev, tipnode)
+    def _updatefrom(self, repo, revs, obsmarkers):
+        """override this method to update your cache data incrementally
+
+        revs:      list of new revision in the changelog
+        obsmarker: list of new obsmarkers in the obsstore
+        """
+        # XXX for now, we'll not actually update the cache, but we'll be
+        # smarter at invalidating it.
+        #
+        # 1) new revisions does not get their entry updated (not update)
+        # 2) if we detect markers affecting non-new revision we reset the cache
+
+        revs = set(revs)
+        rev = repo.changelog.nodemap.get
+        # if we have a new markers affecting a node already covered by the
+        # cache, we must abort.
+        for m in obsmarkers:
+            # check successors and parent
+            for l in (m[1], m[5]):
+                if l is None:
+                    continue
+                for p in l:
+                    r = rev(p)
+                    if r is not None and r in revs:
+                        self.clear(reset=True)
+                        break
+
+    @property
+    def _fullcachekey(self):
+        return (self._schemaversion, ) + self._cachekey
+
+    def load(self, repo):
+        if self._con is None:
+            self._cachekey = self.emptykey
+            self._ondiskcachekey = self.emptykey
+        assert self._cachekey is not None
 
     @util.propertycache
     def _con(self):
@@ -459,7 +498,6 @@
         repo = self._repo()
         if repo is None:
             return None
-        cachekey = self._cachekey(repo)
         con = sqlite3.connect(self._path)
         con.text_factory = str
         cur = con.execute(_queryexist)
@@ -467,10 +505,10 @@
             self._valid = False
             return None
         meta = con.execute(_querymeta).fetchone()
-        if meta != cachekey:
+        if meta is None or meta[0] != self._schemaversion:
             self._valid = False
             return None
-        self._ondiskcachekey = meta
+        self._cachekey = self._ondiskcachekey = meta[1:]
         return con
 
     def save(self, repo):
@@ -499,26 +537,27 @@
                 for req in _sqliteschema:
                     con.execute(req)
 
-                con.execute(_newmeta, self._cachekey(repo))
+                con.execute(_newmeta, self._fullcachekey)
         else:
             con = self._con
             if self._ondiskcachekey is not None:
                 meta = con.execute(_querymeta).fetchone()
-                if meta != self._ondiskcachekey:
+                if meta[1:] != self._ondiskcachekey:
                     # drifting is currently an issue because this means another
                     # process might have already added the cache line we are about
                     # to add. This will confuse sqlite
                     msg = _('obshashrange cache: skipping write, '
                             'database drifted under my feet\n')
-                    data = (meta[2], meta[1], self._ondisktiprev, self._ondisktipnode)
+                    data = (meta[2], meta[1], self._ondiskcachekey[0], self._ondiskcachekey[1])
                     repo.ui.warn(msg)
+                    return
         data = ((rangeid[0], rangeid[1], self[rangeid]) for rangeid in self._new)
         con.executemany(_updateobshash, data)
-        cachekey = self._cachekey(repo)
+        cachekey = self._fullcachekey
         con.execute(_newmeta, cachekey)
         con.commit()
         self._new.clear()
-        self._ondiskcachekey = cachekey
+        self._ondiskcachekey = self._cachekey
 
 @eh.wrapfunction(obsolete.obsstore, '_addmarkers')
 def _addmarkers(orig, obsstore, *args, **kwargs):