# HG changeset patch # User Pierre-Yves David # Date 1494513913 -7200 # Node ID 406c1a57b4ee2bb3c098f8abecffbc6c6cdb5494 # Parent 827d0f0a483f68ade16e5438040cdd7e671da590 obscache: return the new data along-side the upgrade needs (and cache key) Having one function returning consistent data will help prevent race. diff -r 827d0f0a483f -r 406c1a57b4ee hgext3rd/evolve/obscache.py --- a/hgext3rd/evolve/obscache.py Thu May 11 16:06:31 2017 +0200 +++ b/hgext3rd/evolve/obscache.py Thu May 11 16:45:13 2017 +0200 @@ -99,37 +99,46 @@ up to date. """ - # XXX ideally, this function would return a bounded amount of changeset and - # obsmarkers and the associated new cache key. Otherwise we are exposed to - # a race condition between the time the cache is updated and the new cache - # key is computed. (however, we do not want to compute the full new cache - # key in all case because we want to skip reading the obsstore content. We - # could have a smarter implementation here. - # - # In pratice the cache is only updated after each transaction within a - # lock. So we should be fine. We could enforce this with a new repository - # requirement (or fix the race, that is not too hard). + # XXX we need to ensure we use the same changelog and obsstore through the processing + + reset = False status = _checkkey(repo, key) if status is None: - return (False, 0, 0) + reset = True + key = emptykey + obssize, obskey = repo.obsstore.cachekey() + tiprev = len(repo.changelog) - 1 + else: + tiprev, obssize, obskey = status keytiprev, keytipnode, keyobslength, keyobssize, keyobskey = key - tiprev, obssize, __ = status + + if not reset and keytiprev == tiprev and keyobssize == obssize: + return None # nothing to upgrade ### cache is valid, is there anything to update # any new changesets ? - startrev = None + revs = () if keytiprev < tiprev: - startrev = keytiprev + 1 + revs = list(repo.changelog.revs(start=keytiprev + 1, stop=tiprev)) # any new markers - startidx = None + markers = () if keyobssize < obssize: - startidx = keyobslength + # XXX Three are a small race change here. Since the obsstore might have + # move forward between the time we computed the cache key and we access + # the data. To fix this we need so "up to" argument when fetching the + # markers here. Otherwise we might return more markers than covered by + # the cache key. + # + # In pratice the cache is only updated after each transaction within a + # lock. So we should be fine. We could enforce this with a new repository + # requirement (or fix the race, that is not too hard). + markers = markersfrom(repo.obsstore, keyobssize, keyobslength) - return True, startrev, startidx + return reset, revs, markers, (obssize, obskey) def _checkkey(repo, key): """internal function""" @@ -167,7 +176,9 @@ return diskversion, obsolete.formats[diskversion][0](data, off) def markersfrom(obsstore, byteoffset, firstmarker): - if '_all' in vars(obsstore): + if not firstmarker: + return list(obsstore) + elif '_all' in vars(obsstore): # if the data are in memory, just use that return obsstore._all[firstmarker:] else: @@ -253,47 +264,31 @@ if self._cachekey is None: self.load(repo) - valid, startrev, startidx = upgradeneeded(repo, self._cachekey) - if not valid or self._cachekey is None: - self.clear(reset=True) + assert repo.filtername is None + cl = repo.changelog - if startrev is None and startidx is None: + upgrade = upgradeneeded(repo, self._cachekey) + if upgrade is None: return - # checks we never run 'update' without a lock - # - # There are a potential race condition otherwise, since the repo - # "might" have changed side the cache update above. However, this code - # will only be running in a lock so we ignore the issue for now. - # - # Lift this limitation, 'upgradeneeded' should return a bounded amount - # of changeset and markers to read with their associated cachekey. see - # 'upgradeneeded' for detail. - assert repo._currentlock(repo._lockref) is not None + reset, revs, obsmarkers, obskeypair = upgrade + if reset or self._cachekey is None: + self.clear(reset=True) - # process the new changesets - cl = repo.changelog - if startrev is not None: - self._updaterevs(repo, cl.revs(startrev)) - assert len(self._data) == len(cl), (len(self._data), len(cl)) - - # process the new obsmarkers - if startidx is not None: - if startidx == 0: # all markers - markers = repo.obsstore._all - else: - markers = markersfrom(repo.obsstore, self._cachekey[3], startidx) - self._updatemarkers(repo, markers) + if revs: + self._updaterevs(repo, revs) + assert len(self._data) == len(cl), (len(self._data), len(cl)) + if obsmarkers: + self._updatemarkers(repo, obsmarkers) # update the key from the new data key = list(self._cachekey) - if startrev is not None: + if revs: key[0] = len(cl) - 1 key[1] = cl.node(key[0]) - if startidx is not None: - key[2] += len(markers) - # XXX still a small race here if repo is not locked - key[3], key[4] = repo.obsstore.cachekey() + if obsmarkers: + key[2] += len(obsmarkers) + key[3], key[4] = obskeypair self._cachekey = tuple(key) def _updaterevs(self, repo, revs):