# HG changeset patch # User Pierre-Yves David # Date 1494609529 -7200 # Node ID f787f5406a98a2df2971decb78df538eab141f03 # Parent d5de0529a48f95a00cae40db224f01a8eff65bde obscache: extract a data agnostic class We now have an independent class that we can reuse for other purpose (eg: obshashrange cache) diff -r d5de0529a48f -r f787f5406a98 hgext3rd/evolve/obscache.py --- a/hgext3rd/evolve/obscache.py Fri May 12 19:07:14 2017 +0200 +++ b/hgext3rd/evolve/obscache.py Fri May 12 19:18:49 2017 +0200 @@ -117,72 +117,97 @@ obsdata = obsstore.svfs.tryread('obsstore') return _readmarkers(obsdata, byteoffset)[1] -class obscache(object): - """cache the "does a rev" is the precursors of some obsmarkers data - This is not directly holding the "is this revision obsolete" information, - because phases data gets into play here. However, it allow to compute the - "obsolescence" set without reading the obsstore content. - - Implementation note #1: - - The obsstore is implementing only half of the transaction logic it - should. It properly record the starting point of the obsstore to allow - clean rollback. However it still write to the obsstore file directly - during the transaction. Instead it should be keeping data in memory and - write to a '.pending' file to make the data vailable for hooks. +class dualsourcecache(object): + """An abstract class for cache that needs both changelog and obsstore - This cache is not going futher than what the obstore is doing, so it does - not has any '.pending' logic. When the obsstore gains proper '.pending' - support, adding it to this cache should not be too hard. As the flag - always move from 0 to 1, we could have a second '.pending' cache file to - be read. If flag is set in any of them, the value is 1. For the same - reason, updating the file in place should be possible. - - Implementation note #2: - - Instead of having a large final update run, we could update this cache at - the level adding a new changeset or a new obsmarkers. More on this in the - 'update code'. - - Implementation note #3: - - Storage-wise, we could have a "start rev" to avoid storing useless - zero. That would be especially useful for the '.pending' overlay. + This class handle the tracking of changelog and obsstore update. It provide + data to performs incremental update (see the 'updatefrom' function for + details). This class can also detect stripping of the changelog or the + obsstore and can reset the cache in this cache (see the 'clear' function + for details). """ - _filepath = 'cache/evoext-obscache-00' - _headerformat = '>q20sQQ20s' - + # default key used for an empty cache + # + # The cache key covering the changesets and obsmarkers content + # + # The cache key parts are: + # - tip-rev, + # - tip-node, + # - obsstore-length (nb markers), + # - obsstore-file-size (in bytes), + # - obsstore "cache key" emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid) - def __init__(self, repo): - self._vfs = repo.vfs - # cache key covering the changesets and obsmarkers content - # - # It contains the following data. Combined with 'upgradeneeded' it allows to - # do iterative upgrade for cache depending of theses two pieces of data. - # - # The cache key parts are" - # - tip-rev, - # - tip-node, - # - obsstore-length (nb markers), - # - obsstore-file-size (in bytes), - # - obsstore "cache key" + def __init__(self): self._cachekey = None - self._ondiskkey = None - self._data = bytearray() + + def _updatefrom(self, repo, revs, obsmarkers): + """override this method to update your cache data incrementally - def get(self, rev): - """return True if "rev" is used as "precursors for any obsmarkers - - Make sure the cache has been updated to match the repository content before using it""" - return self._data[rev] + revs: list of new revision in the changelog + obsmarker: list of new obsmarkers in the obsstore + """ + raise NotImplementedError def clear(self, reset=False): - """invalidate the cache content""" + """invalidate the cache content + + if 'reset' is passed, we detected a strip and the cache will have to be + recomputed. + """ + # /!\ IMPORTANT /!\ + # You must overide this method to actually self._cachekey = self.emptykey if reset else None - self._data = bytearray() + + # Useful public function (no need to override them) + + def uptodate(self, repo): + """return True if the cache content is up to date False otherwise + + This method can be used to detect of the cache is lagging behind new + data in either changelog or obsstore. + """ + if self._cachekey is None: + self.load(repo) + status = self._checkkey(repo.changelog, repo.obsstore) + return (status is not None + and status[0] == self._cachekey[0] # tiprev + and status[1] == self._cachekey[3]) # obssize + + def update(self, repo): + """update the cache with new repository data + + The update will be incremental when possible""" + # If we do not have any data, try loading from disk + if self._cachekey is None: + self.load(repo) + + assert repo.filtername is None + cl = repo.changelog + + upgrade = self._upgradeneeded(repo) + if upgrade is None: + return + + reset, revs, obsmarkers, obskeypair = upgrade + if reset or self._cachekey is None: + self.clear(reset=True) + + self._updatefrom(repo, revs, obsmarkers) + + # update the key from the new data + key = list(self._cachekey) + if revs: + key[0] = len(cl) - 1 + key[1] = cl.node(key[0]) + if obsmarkers: + key[2] += len(obsmarkers) + key[3], key[4] = obskeypair + self._cachekey = tuple(key) + + # from here, there are internal function only def _checkkey(self, changelog, obsstore): """internal function""" @@ -203,15 +228,7 @@ return None return tiprev, obssize, obskey - def uptodate(self, repo): - if self._cachekey is None: - self.load(repo) - status = self._checkkey(repo.changelog, repo.obsstore) - return (status is not None - and status[0] == self._cachekey[0] # tiprev - and status[1] == self._cachekey[3]) # obssize - - def upgradeneeded(self, repo): + def _upgradeneeded(self, repo): """return (valid, start-rev, start-obs-idx) 'valid': is "False" if older cache value needs invalidation, @@ -268,34 +285,62 @@ return reset, revs, markers, (obssize, obskey) - def update(self, repo): - """Iteratively update the cache with new repository data""" - # If we do not have any data, try loading from disk - if self._cachekey is None: - self.load(repo) + +class obscache(dualsourcecache): + """cache the "does a rev" is the precursors of some obsmarkers data + + This is not directly holding the "is this revision obsolete" information, + because phases data gets into play here. However, it allow to compute the + "obsolescence" set without reading the obsstore content. + + Implementation note #1: - assert repo.filtername is None - cl = repo.changelog + The obsstore is implementing only half of the transaction logic it + should. It properly record the starting point of the obsstore to allow + clean rollback. However it still write to the obsstore file directly + during the transaction. Instead it should be keeping data in memory and + write to a '.pending' file to make the data vailable for hooks. - upgrade = self.upgradeneeded(repo) - if upgrade is None: - return + This cache is not going futher than what the obstore is doing, so it does + not has any '.pending' logic. When the obsstore gains proper '.pending' + support, adding it to this cache should not be too hard. As the flag + always move from 0 to 1, we could have a second '.pending' cache file to + be read. If flag is set in any of them, the value is 1. For the same + reason, updating the file in place should be possible. + + Implementation note #2: - reset, revs, obsmarkers, obskeypair = upgrade - if reset or self._cachekey is None: - self.clear(reset=True) + Instead of having a large final update run, we could update this cache at + the level adding a new changeset or a new obsmarkers. More on this in the + 'update code'. + + Implementation note #3: - def _updatefrom(repo, revs, obsmarkers) + Storage-wise, we could have a "start rev" to avoid storing useless + zero. That would be especially useful for the '.pending' overlay. + """ + + _filepath = 'cache/evoext-obscache-00' + _headerformat = '>q20sQQ20s' + + emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid) - # update the key from the new data - key = list(self._cachekey) - if revs: - key[0] = len(cl) - 1 - key[1] = cl.node(key[0]) - if obsmarkers: - key[2] += len(obsmarkers) - key[3], key[4] = obskeypair - self._cachekey = tuple(key) + def __init__(self, repo): + super(obscache, self).__init__() + self._ondiskkey = None + self._vfs = repo.vfs + self._data = bytearray() + + def get(self, rev): + """return True if "rev" is used as "precursors for any obsmarkers + + Make sure the cache has been updated to match the repository content before using it""" + return self._data[rev] + + def clear(self, reset=False): + """invalidate the cache content""" + super(obscache, self).clear(reset=reset) + self._data = bytearray() def _updatefrom(self, repo, revs, obsmarkers): if revs: