obscache: return the new data along-side the upgrade needs (and cache key)
authorPierre-Yves David <pierre-yves.david@octobus.net>
Thu, 11 May 2017 16:45:13 +0200
changeset 2345 406c1a57b4ee
parent 2344 827d0f0a483f
child 2346 34c6382dbb82
obscache: return the new data along-side the upgrade needs (and cache key) Having one function returning consistent data will help prevent race.
hgext3rd/evolve/obscache.py
--- a/hgext3rd/evolve/obscache.py	Thu May 11 16:06:31 2017 +0200
+++ b/hgext3rd/evolve/obscache.py	Thu May 11 16:45:13 2017 +0200
@@ -99,37 +99,46 @@
                      up to date.
     """
 
-    # XXX ideally, this function would return a bounded amount of changeset and
-    # obsmarkers and the associated new cache key. Otherwise we are exposed to
-    # a race condition between the time the cache is updated and the new cache
-    # key is computed. (however, we do not want to compute the full new cache
-    # key in all case because we want to skip reading the obsstore content. We
-    # could have a smarter implementation here.
-    #
-    # In pratice the cache is only updated after each transaction within a
-    # lock. So we should be fine. We could enforce this with a new repository
-    # requirement (or fix the race, that is not too hard).
+    # XXX we need to ensure we use the same changelog and obsstore through the processing
+
+    reset = False
 
     status = _checkkey(repo, key)
     if status is None:
-        return (False, 0, 0)
+        reset = True
+        key = emptykey
+        obssize, obskey = repo.obsstore.cachekey()
+        tiprev = len(repo.changelog) - 1
+    else:
+        tiprev, obssize, obskey = status
 
     keytiprev, keytipnode, keyobslength, keyobssize, keyobskey = key
-    tiprev, obssize, __ = status
+
+    if not reset and keytiprev == tiprev and keyobssize == obssize:
+        return None # nothing to upgrade
 
     ### cache is valid, is there anything to update
 
     # any new changesets ?
-    startrev = None
+    revs = ()
     if keytiprev < tiprev:
-        startrev = keytiprev + 1
+        revs = list(repo.changelog.revs(start=keytiprev + 1, stop=tiprev))
 
     # any new markers
-    startidx = None
+    markers = ()
     if keyobssize < obssize:
-        startidx = keyobslength
+        # XXX Three are a small race change here. Since the obsstore might have
+        # move forward between the time we computed the cache key and we access
+        # the data. To fix this we need so "up to" argument when fetching the
+        # markers here. Otherwise we might return more markers than covered by
+        # the cache key.
+        #
+        # In pratice the cache is only updated after each transaction within a
+        # lock. So we should be fine. We could enforce this with a new repository
+        # requirement (or fix the race, that is not too hard).
+        markers = markersfrom(repo.obsstore, keyobssize, keyobslength)
 
-    return True, startrev, startidx
+    return reset, revs, markers, (obssize, obskey)
 
 def _checkkey(repo, key):
     """internal function"""
@@ -167,7 +176,9 @@
     return diskversion, obsolete.formats[diskversion][0](data, off)
 
 def markersfrom(obsstore, byteoffset, firstmarker):
-    if '_all' in vars(obsstore):
+    if not firstmarker:
+        return list(obsstore)
+    elif '_all' in vars(obsstore):
         # if the data are in memory, just use that
         return obsstore._all[firstmarker:]
     else:
@@ -253,47 +264,31 @@
         if self._cachekey is None:
             self.load(repo)
 
-        valid, startrev, startidx = upgradeneeded(repo, self._cachekey)
-        if not valid or self._cachekey is None:
-            self.clear(reset=True)
+        assert repo.filtername is None
+        cl = repo.changelog
 
-        if startrev is None and startidx is None:
+        upgrade = upgradeneeded(repo, self._cachekey)
+        if upgrade is None:
             return
 
-        # checks we never run 'update' without a lock
-        #
-        # There are a potential race condition otherwise, since the repo
-        # "might" have changed side the cache update above. However, this code
-        # will only be running in a lock so we ignore the issue for now.
-        #
-        # Lift this limitation, 'upgradeneeded' should return a bounded amount
-        # of changeset and markers to read with their associated cachekey. see
-        # 'upgradeneeded' for detail.
-        assert repo._currentlock(repo._lockref) is not None
+        reset, revs, obsmarkers, obskeypair = upgrade
+        if reset or self._cachekey is None:
+            self.clear(reset=True)
 
-        # process the new changesets
-        cl = repo.changelog
-        if startrev is not None:
-            self._updaterevs(repo, cl.revs(startrev))
-        assert len(self._data) == len(cl), (len(self._data), len(cl))
-
-        # process the new obsmarkers
-        if startidx is not None:
-            if startidx == 0: # all markers
-                markers = repo.obsstore._all
-            else:
-                markers = markersfrom(repo.obsstore, self._cachekey[3], startidx)
-            self._updatemarkers(repo, markers)
+        if revs:
+            self._updaterevs(repo, revs)
+            assert len(self._data) == len(cl), (len(self._data), len(cl))
+        if obsmarkers:
+            self._updatemarkers(repo, obsmarkers)
 
         # update the key from the new data
         key = list(self._cachekey)
-        if startrev is not None:
+        if revs:
             key[0] = len(cl) - 1
             key[1] = cl.node(key[0])
-        if startidx is not None:
-            key[2] += len(markers)
-            # XXX still a small race here if repo is not locked
-            key[3], key[4] = repo.obsstore.cachekey()
+        if obsmarkers:
+            key[2] += len(obsmarkers)
+            key[3], key[4] = obskeypair
         self._cachekey = tuple(key)
 
     def _updaterevs(self, repo, revs):