obshashrange: introduce basic sqlite caching
authorPierre-Yves David <pierre-yves.david@ens-lyon.org>
Fri, 24 Mar 2017 18:28:01 +0100
changeset 2241 88aace74b190
parent 2240 ecb993892c61
child 2242 128923ff68c8
obshashrange: introduce basic sqlite caching Same as for stablerange, this is far from perfect but already a win. The cache is currently extremely volatile, but that will still be a win when doing multiple consecutive request during discovery. Better cache invalidation will happens "in the future".
hgext3rd/evolve/obsdiscovery.py
--- a/hgext3rd/evolve/obsdiscovery.py	Fri Mar 24 15:57:54 2017 +0100
+++ b/hgext3rd/evolve/obsdiscovery.py	Fri Mar 24 18:28:01 2017 +0100
@@ -24,7 +24,9 @@
 
 import hashlib
 import heapq
+import sqlite3
 import struct
+import weakref
 
 from mercurial import (
     bundle2,
@@ -321,6 +323,7 @@
         querycount += 1
         ui.progress(_("comparing obsmarker with other"), querycount)
     ui.progress(_("comparing obsmarker with other"), None)
+    local.obsstore.rangeobshashcache.save(local)
     return sorted(missing)
 
 def _queryrange(ui, repo, remote, allentries):
@@ -371,6 +374,7 @@
         rhash = _obshashrange(op.repo, (r, index))
         replies.append(data + rhash)
         data = inpart.read(24)
+    op.repo.obsstore.rangeobshashcache.save(op.repo)
     op.reply.newpart('reply:_donotusemeever_evoext_obshashrange_1', data=iter(replies))
 
 
@@ -400,7 +404,8 @@
     s = node.short
     revs = scmutil.revrange(repo, opts['rev'])
     # prewarm depth cache
-    repo.stablerange.warmup(repo, max(revs))
+    if revs:
+        repo.stablerange.warmup(repo, max(revs))
     cl = repo.changelog
     rangelength = repo.stablerange.rangelength
     depthrev = repo.stablerange.depthrev
@@ -420,6 +425,7 @@
              depthrev(repo, r[0]),
              node.short(_obshashrange(repo, r)))
         ui.status(linetemplate % d)
+    repo.obsstore.rangeobshashcache.save(repo)
 
 def _obshashrange(repo, rangeid):
     """return the obsolete hash associated to a range"""
@@ -457,15 +463,160 @@
     cache[rangeid] = obshash
     return obshash
 
+### sqlite caching
+
+_sqliteschema = [
+    """CREATE TABLE meta(schemaversion INTEGER NOT NULL,
+                         nbobsmarker   INTEGER NOT NULL,
+                         obstipdata    BLOB    NOT NULL,
+                         tiprev        INTEGER NOT NULL,
+                         tipnode       BLOB    NOT NULL
+                        );""",
+    """CREATE TABLE obshashrange(rev     INTEGER NOT NULL,
+                                 idx     INTEGER NOT NULL,
+                                 obshash BLOB    NOT NULL,
+                                 PRIMARY KEY(rev, idx));""",
+    "CREATE INDEX range_index ON obshashrange(rev, idx);",
+]
+_queryexist = "SELECT name FROM sqlite_master WHERE type='table' AND name='meta';"
+_newmeta = """INSERT INTO meta (schemaversion, nbobsmarker, obstipdata, tiprev, tipnode)
+            VALUES (?,?,?,?,?);"""
+_updateobshash = "INSERT INTO obshashrange(rev, idx, obshash) VALUES (?,?,?);"
+_querymeta = "SELECT schemaversion, nbobsmarker, obstipdata, tiprev, tipnode FROM meta;"
+_queryobshash = "SELECT obshash FROM obshashrange WHERE (rev = ? AND idx = ?);"
+
+class _obshashcache(dict):
+
+    _schemaversion = 0
+
+    def __init__(self, repo):
+        super(_obshashcache, self).__init__()
+        self._path = repo.vfs.join('cache/evoext_obshashrange_v0.sqlite')
+        self._new = set()
+        self._valid = True
+        self._repo = weakref.ref(repo.unfiltered())
+        # cache status
+        self._ondiskcachekey = None
+
+    def clear(self):
+        self._valid = False
+        super(_obshashcache, self).clear()
+        self._new.clear()
+
+    def get(self, rangeid):
+        value = super(_obshashcache, self).get(rangeid)
+        if value is None and self._con is not None:
+            nrange = (rangeid[0], rangeid[1])
+            obshash = self._con.execute(_queryobshash, nrange).fetchone()
+            if obshash is not None:
+                value = obshash[0]
+        return value
+
+    def __setitem__(self, rangeid, obshash):
+        self._new.add(rangeid)
+        super(_obshashcache, self).__setitem__(rangeid, obshash)
+
+    def _cachekey(self, repo):
+        # XXX for now the cache is very volatile, but this is still a win
+        nbobsmarker = len(repo.obsstore._all)
+        if nbobsmarker:
+            tipdata = obsolete._fm1encodeonemarker(repo.obsstore._all[-1])
+        else:
+            tipdata = node.nullid
+        tiprev = len(repo.changelog) - 1
+        tipnode = repo.changelog.node(tiprev)
+        return (self._schemaversion, nbobsmarker, tipdata, tiprev, tipnode)
+
+    @util.propertycache
+    def _con(self):
+        if not self._valid:
+            return None
+        repo = self._repo()
+        if repo is None:
+            return None
+        cachekey = self._cachekey(repo)
+        con = sqlite3.connect(self._path)
+        con.text_factory = str
+        cur = con.execute(_queryexist)
+        if cur.fetchone() is None:
+            self._valid = False
+            return None
+        meta = con.execute(_querymeta).fetchone()
+        if meta != cachekey:
+            self._valid = False
+            return None
+        self._ondiskcachekey = meta
+        return con
+
+    def save(self, repo):
+        repo = repo.unfiltered()
+        try:
+            if not self._new:
+                return
+            with repo.lock():
+                self._save(repo)
+        except error.LockError:
+            # Exceptionnally we are noisy about it since performance impact
+            # is large We should address that before using this more
+            # widely.
+            msg = _('obshashrange cache: skipping save unable to lock repo\n')
+            repo.ui.warn(msg)
+
+    def _save(self, repo):
+        if self._con is None:
+            util.unlinkpath(self._path, ignoremissing=True)
+            if '_con' in vars(self):
+                del self._con
+
+            con = sqlite3.connect(self._path)
+            con.text_factory = str
+            with con:
+                for req in _sqliteschema:
+                    con.execute(req)
+
+                con.execute(_newmeta, self._cachekey(repo))
+        else:
+            con = self._con
+            if self._ondiskcachekey is not None:
+                meta = con.execute(_querymeta).fetchone()
+                if meta != self._ondiskcachekey:
+                    # drifting is currently an issue because this means another
+                    # process might have already added the cache line we are about
+                    # to add. This will confuse sqlite
+                    msg = _('obshashrange cache: skipping write, '
+                            'database drifted under my feet\n')
+                    data = (meta[2], meta[1], self._ondisktiprev, self._ondisktipnode)
+                    repo.ui.warn(msg)
+        data = ((rangeid[0], rangeid[1], self[rangeid]) for rangeid in self._new)
+        con.executemany(_updateobshash, data)
+        cachekey = self._cachekey(repo)
+        con.execute(_newmeta, cachekey)
+        con.commit()
+        self._new.clear()
+        self._ondiskcachekey = cachekey
+
 @eh.wrapfunction(obsolete.obsstore, '_addmarkers')
 def _addmarkers(orig, obsstore, *args, **kwargs):
     obsstore.rangeobshashcache.clear()
     return orig(obsstore, *args, **kwargs)
 
-@eh.addattr(obsolete.obsstore, 'rangeobshashcache')
-@util.propertycache
-def rangeobshashcache(obsstore):
-    return {}
+# obsstore is a filecache so we have do to some spacial dancing
+@eh.wrapfunction(localrepo.localrepository.obsstore, 'func')
+def obsstorewithcache(orig, repo):
+    obsstore = orig(repo)
+    obsstore.rangeobshashcache = _obshashcache(repo.unfiltered())
+    return obsstore
+
+@eh.reposetup
+def setupcache(ui, repo):
+
+    class obshashrepo(repo.__class__):
+        @localrepo.unfilteredmethod
+        def destroyed(self):
+            if 'stablerange' in vars(self):
+                del self.stablerange
+
+    repo.__class__ = obshashrepo
 
 #############################
 ### Tree Hash computation ###