hgext3rd/evolve/obscache.py
changeset 2357 f787f5406a98
parent 2356 d5de0529a48f
child 2359 ff635fa59a25
equal deleted inserted replaced
2356:d5de0529a48f 2357:f787f5406a98
   115         return obsstore._all[firstmarker:]
   115         return obsstore._all[firstmarker:]
   116     else:
   116     else:
   117         obsdata = obsstore.svfs.tryread('obsstore')
   117         obsdata = obsstore.svfs.tryread('obsstore')
   118         return _readmarkers(obsdata, byteoffset)[1]
   118         return _readmarkers(obsdata, byteoffset)[1]
   119 
   119 
   120 class obscache(object):
   120 
   121     """cache the "does a rev" is the precursors of some obsmarkers data
   121 class dualsourcecache(object):
   122 
   122     """An abstract class for cache that needs both changelog and obsstore
   123     This is not directly holding the "is this revision obsolete" information,
   123 
   124     because phases data gets into play here. However, it allow to compute the
   124     This class handle the tracking of changelog and obsstore update. It provide
   125     "obsolescence" set without reading the obsstore content.
   125     data to performs incremental update (see the 'updatefrom' function for
   126 
   126     details).  This class can also detect stripping of the changelog or the
   127     Implementation note #1:
   127     obsstore and can reset the cache in this cache (see the 'clear' function
   128 
   128     for details).
   129       The obsstore is implementing only half of the transaction logic it
       
   130       should. It properly record the starting point of the obsstore to allow
       
   131       clean rollback. However it still write to the obsstore file directly
       
   132       during the transaction. Instead it should be keeping data in memory and
       
   133       write to a '.pending' file to make the data vailable for hooks.
       
   134 
       
   135       This cache is not going futher than what the obstore is doing, so it does
       
   136       not has any '.pending' logic. When the obsstore gains proper '.pending'
       
   137       support, adding it to this cache should not be too hard. As the flag
       
   138       always move from 0 to 1, we could have a second '.pending' cache file to
       
   139       be read. If flag is set in any of them, the value is 1. For the same
       
   140       reason, updating the file in place should be possible.
       
   141 
       
   142     Implementation note #2:
       
   143 
       
   144       Instead of having a large final update run, we could update this cache at
       
   145       the level adding a new changeset or a new obsmarkers. More on this in the
       
   146       'update code'.
       
   147 
       
   148     Implementation note #3:
       
   149 
       
   150         Storage-wise, we could have a "start rev" to avoid storing useless
       
   151         zero. That would be especially useful for the '.pending' overlay.
       
   152     """
   129     """
   153 
   130 
   154     _filepath = 'cache/evoext-obscache-00'
   131     # default key used for an empty cache
   155     _headerformat = '>q20sQQ20s'
   132     #
   156 
   133     # The cache key covering the changesets and obsmarkers content
       
   134     #
       
   135     # The cache key parts are:
       
   136     # - tip-rev,
       
   137     # - tip-node,
       
   138     # - obsstore-length (nb markers),
       
   139     # - obsstore-file-size (in bytes),
       
   140     # - obsstore "cache key"
   157     emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid)
   141     emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid)
   158 
   142 
   159     def __init__(self, repo):
   143     def __init__(self):
   160         self._vfs = repo.vfs
       
   161         # cache key covering the changesets and obsmarkers content
       
   162         #
       
   163         # It contains the following data. Combined with 'upgradeneeded' it allows to
       
   164         # do iterative upgrade for cache depending of theses two pieces of data.
       
   165         #
       
   166         # The cache key parts are"
       
   167         # - tip-rev,
       
   168         # - tip-node,
       
   169         # - obsstore-length (nb markers),
       
   170         # - obsstore-file-size (in bytes),
       
   171         # - obsstore "cache key"
       
   172         self._cachekey = None
   144         self._cachekey = None
   173         self._ondiskkey = None
   145 
   174         self._data = bytearray()
   146     def _updatefrom(self, repo, revs, obsmarkers):
   175 
   147         """override this method to update your cache data incrementally
   176     def get(self, rev):
   148 
   177         """return True if "rev" is used as "precursors for any obsmarkers
   149         revs:      list of new revision in the changelog
   178 
   150         obsmarker: list of new obsmarkers in the obsstore
   179         Make sure the cache has been updated to match the repository content before using it"""
   151         """
   180         return self._data[rev]
   152         raise NotImplementedError
   181 
   153 
   182     def clear(self, reset=False):
   154     def clear(self, reset=False):
   183         """invalidate the cache content"""
   155         """invalidate the cache content
       
   156 
       
   157         if 'reset' is passed, we detected a strip and the cache will have to be
       
   158         recomputed.
       
   159         """
       
   160         # /!\ IMPORTANT /!\
       
   161         # You must overide this method to actually
   184         self._cachekey = self.emptykey if reset else None
   162         self._cachekey = self.emptykey if reset else None
   185         self._data = bytearray()
   163 
       
   164     # Useful public function (no need to override them)
       
   165 
       
   166     def uptodate(self, repo):
       
   167         """return True if the cache content is up to date False otherwise
       
   168 
       
   169         This method can be used to detect of the cache is lagging behind new
       
   170         data in either changelog or obsstore.
       
   171         """
       
   172         if self._cachekey is None:
       
   173             self.load(repo)
       
   174         status = self._checkkey(repo.changelog, repo.obsstore)
       
   175         return (status is not None
       
   176                 and status[0] == self._cachekey[0] # tiprev
       
   177                 and status[1] == self._cachekey[3]) # obssize
       
   178 
       
   179     def update(self, repo):
       
   180         """update the cache with new repository data
       
   181 
       
   182         The update will be incremental when possible"""
       
   183         # If we do not have any data, try loading from disk
       
   184         if self._cachekey is None:
       
   185             self.load(repo)
       
   186 
       
   187         assert repo.filtername is None
       
   188         cl = repo.changelog
       
   189 
       
   190         upgrade = self._upgradeneeded(repo)
       
   191         if upgrade is None:
       
   192             return
       
   193 
       
   194         reset, revs, obsmarkers, obskeypair = upgrade
       
   195         if reset or self._cachekey is None:
       
   196             self.clear(reset=True)
       
   197 
       
   198         self._updatefrom(repo, revs, obsmarkers)
       
   199 
       
   200         # update the key from the new data
       
   201         key = list(self._cachekey)
       
   202         if revs:
       
   203             key[0] = len(cl) - 1
       
   204             key[1] = cl.node(key[0])
       
   205         if obsmarkers:
       
   206             key[2] += len(obsmarkers)
       
   207             key[3], key[4] = obskeypair
       
   208         self._cachekey = tuple(key)
       
   209 
       
   210     # from here, there are internal function only
   186 
   211 
   187     def _checkkey(self, changelog, obsstore):
   212     def _checkkey(self, changelog, obsstore):
   188         """internal function"""
   213         """internal function"""
   189         key = self._cachekey
   214         key = self._cachekey
   190         if key is None:
   215         if key is None:
   201         obssize, obskey = obsstore.cachekey(index=keyobssize)
   226         obssize, obskey = obsstore.cachekey(index=keyobssize)
   202         if obskey != keyobskey:
   227         if obskey != keyobskey:
   203             return None
   228             return None
   204         return tiprev, obssize, obskey
   229         return tiprev, obssize, obskey
   205 
   230 
   206     def uptodate(self, repo):
   231     def _upgradeneeded(self, repo):
   207         if self._cachekey is None:
       
   208             self.load(repo)
       
   209         status = self._checkkey(repo.changelog, repo.obsstore)
       
   210         return (status is not None
       
   211                 and status[0] == self._cachekey[0] # tiprev
       
   212                 and status[1] == self._cachekey[3]) # obssize
       
   213 
       
   214     def upgradeneeded(self, repo):
       
   215         """return (valid, start-rev, start-obs-idx)
   232         """return (valid, start-rev, start-obs-idx)
   216 
   233 
   217         'valid': is "False" if older cache value needs invalidation,
   234         'valid': is "False" if older cache value needs invalidation,
   218 
   235 
   219         'start-rev': first revision not in the cache. None if cache is up to date,
   236         'start-rev': first revision not in the cache. None if cache is up to date,
   266             # requirement (or fix the race, that is not too hard).
   283             # requirement (or fix the race, that is not too hard).
   267             markers = markersfrom(obsstore, keyobssize, keyobslength)
   284             markers = markersfrom(obsstore, keyobssize, keyobslength)
   268 
   285 
   269         return reset, revs, markers, (obssize, obskey)
   286         return reset, revs, markers, (obssize, obskey)
   270 
   287 
   271     def update(self, repo):
   288 
   272         """Iteratively update the cache with new repository data"""
   289 class obscache(dualsourcecache):
   273         # If we do not have any data, try loading from disk
   290     """cache the "does a rev" is the precursors of some obsmarkers data
   274         if self._cachekey is None:
   291 
   275             self.load(repo)
   292     This is not directly holding the "is this revision obsolete" information,
   276 
   293     because phases data gets into play here. However, it allow to compute the
   277         assert repo.filtername is None
   294     "obsolescence" set without reading the obsstore content.
   278         cl = repo.changelog
   295 
   279 
   296     Implementation note #1:
   280         upgrade = self.upgradeneeded(repo)
   297 
   281         if upgrade is None:
   298       The obsstore is implementing only half of the transaction logic it
   282             return
   299       should. It properly record the starting point of the obsstore to allow
   283 
   300       clean rollback. However it still write to the obsstore file directly
   284         reset, revs, obsmarkers, obskeypair = upgrade
   301       during the transaction. Instead it should be keeping data in memory and
   285         if reset or self._cachekey is None:
   302       write to a '.pending' file to make the data vailable for hooks.
   286             self.clear(reset=True)
   303 
   287 
   304       This cache is not going futher than what the obstore is doing, so it does
   288         def _updatefrom(repo, revs, obsmarkers)
   305       not has any '.pending' logic. When the obsstore gains proper '.pending'
   289 
   306       support, adding it to this cache should not be too hard. As the flag
   290         # update the key from the new data
   307       always move from 0 to 1, we could have a second '.pending' cache file to
   291         key = list(self._cachekey)
   308       be read. If flag is set in any of them, the value is 1. For the same
   292         if revs:
   309       reason, updating the file in place should be possible.
   293             key[0] = len(cl) - 1
   310 
   294             key[1] = cl.node(key[0])
   311     Implementation note #2:
   295         if obsmarkers:
   312 
   296             key[2] += len(obsmarkers)
   313       Instead of having a large final update run, we could update this cache at
   297             key[3], key[4] = obskeypair
   314       the level adding a new changeset or a new obsmarkers. More on this in the
   298         self._cachekey = tuple(key)
   315       'update code'.
       
   316 
       
   317     Implementation note #3:
       
   318 
       
   319         Storage-wise, we could have a "start rev" to avoid storing useless
       
   320         zero. That would be especially useful for the '.pending' overlay.
       
   321     """
       
   322 
       
   323     _filepath = 'cache/evoext-obscache-00'
       
   324     _headerformat = '>q20sQQ20s'
       
   325 
       
   326     emptykey = (node.nullrev, node.nullid, 0, 0, node.nullid)
       
   327 
       
   328     def __init__(self, repo):
       
   329         super(obscache, self).__init__()
       
   330         self._ondiskkey = None
       
   331         self._vfs = repo.vfs
       
   332         self._data = bytearray()
       
   333 
       
   334     def get(self, rev):
       
   335         """return True if "rev" is used as "precursors for any obsmarkers
       
   336 
       
   337         Make sure the cache has been updated to match the repository content before using it"""
       
   338         return self._data[rev]
       
   339 
       
   340     def clear(self, reset=False):
       
   341         """invalidate the cache content"""
       
   342         super(obscache, self).clear(reset=reset)
       
   343         self._data = bytearray()
   299 
   344 
   300     def _updatefrom(self, repo, revs, obsmarkers):
   345     def _updatefrom(self, repo, revs, obsmarkers):
   301         if revs:
   346         if revs:
   302             self._updaterevs(repo, revs)
   347             self._updaterevs(repo, revs)
   303         if obsmarkers:
   348         if obsmarkers: