--- a/hgext3rd/evolve/obscache.py Mon May 01 08:07:05 2017 +0200
+++ b/hgext3rd/evolve/obscache.py Mon May 01 08:13:24 2017 +0200
@@ -7,6 +7,18 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
+import hashlib
+import struct
+import weakref
+import errno
+
+from mercurial import (
+ localrepo,
+ obsolete,
+ phases,
+ node,
+)
+
from . import (
exthelper,
)
@@ -27,6 +39,7 @@
@eh.wrapfunction(obsstorefilecache, 'func')
def obsstorewithcache(orig, repo):
obsstore = orig(repo)
+ obsstore.obscache = obscache(repo.unfiltered())
class cachekeyobsstore(obsstore.__class__):
@@ -138,3 +151,171 @@
startidx = keyobslength
return True, startrev, startidx
+
+class obscache(object):
+ """cache the "does a rev" is the precursors of some obsmarkers data
+
+ This is not directly holding the "is this revision obsolete" information,
+ because phases data gets into play here. However, it allow to compute the
+ "obsolescence" set without reading the obsstore content.
+
+ Implementation note #1:
+
+ The obsstore is implementing only half of the transaction logic it
+ should. It properly record the starting point of the obsstore to allow
+ clean rollback. However it still write to the obsstore file directly
+ during the transaction. Instead it should be keeping data in memory and
+ write to a '.pending' file to make the data vailable for hooks.
+
+ This cache is not going futher than what the obstore is doing, so it does
+ not has any '.pending' logic. When the obsstore gains proper '.pending'
+ support, adding it to this cache should not be too hard. As the flag
+ always move from 0 to 1, we could have a second '.pending' cache file to
+ be read. If flag is set in any of them, the value is 1. For the same
+ reason, updating the file in place should be possible.
+
+ Implementation note #2:
+
+ Instead of having a large final update run, we could update this cache at
+ the level adding a new changeset or a new obsmarkers. More on this in the
+ 'update code'.
+
+ Implementation note #3:
+
+ Storage-wise, we could have a "start rev" to avoid storing useless
+ zero. That would be especially useful for the '.pending' overlay.
+ """
+
+ _filepath = 'cache/evoext-obscache-00'
+ _headerformat = '>I20sQQ20s'
+
+ def __init__(self, repo):
+ self._vfs = repo.vfs
+ # The cache key parts are"
+ # - tip-rev,
+ # - tip-node,
+ # - obsstore-length (nb markers),
+ # - obsstore-file-size (in bytes),
+ # - obsstore "cache key"
+ self._cachekey = None
+ self._data = bytearray()
+
+ def get(self, rev):
+ """return True if "rev" is used as "precursors for any obsmarkers
+
+ Make sure the cache has been updated to match the repository content before using it"""
+ return self._data[rev]
+
+ def clear(self):
+ """invalidate the cache content"""
+ self._cachekey = None
+ self._data = bytearray()
+
+ def update(self, repo):
+ """Iteratively update the cache with new repository data"""
+ # If we do not have any data, try loading from disk
+ if self._cachekey is None:
+ self.load(repo)
+
+ valid, startrev, startidx = upgradeneeded(repo, self._cachekey)
+ if not valid:
+ self.clear()
+
+ if startrev is None and startidx is None:
+ return
+
+ # process the new changesets
+ cl = repo.changelog
+ if startrev is not None:
+ node = cl.node
+ # Note:
+ #
+ # Newly added changeset might be affected by obsolescence markers
+ # we already have locally. So we needs to have soem global
+ # knowledge about the markers to handle that question. Right this
+ # requires parsing all markers in the obsstore. However, we could
+ # imagine using various optimisation (eg: bloom filter, other on
+ # disk cache) to remove this full parsing.
+ #
+ # For now we stick to the simpler approach or paying the
+ # performance cost on new changesets.
+ succs = repo.obsstore.successors
+ for r in cl.revs(startrev):
+ if node(r) in succs:
+ val = 1
+ else:
+ val = 0
+ self._data.append(val)
+ assert len(self._data) == len(cl), (len(self._data), len(cl))
+
+ # process the new obsmarkers
+ if startidx is not None:
+ rev = cl.nodemap.get
+ markers = repo.obsstore._all
+ # Note:
+ #
+ # There are no actually needs to load the full obsstore here,
+ # since we only read the latest ones. We do it for simplicity in
+ # the first implementation. Loading the full obsstore has a
+ # performance cost and should go away in this case too. We have
+ # two simples options for that:
+ #
+ # 1) provide and API to start reading markers from a byte offset
+ # (we have that data in the cache key)
+ #
+ # 2) directly update the cache at a lower level, in the code
+ # responsible for adding a markers.
+ #
+ # Option 2 is probably a bit more invasive, but more solid on the long run
+
+ for i in xrange(startidx, len(repo.obsstore)):
+ r = rev(markers[i][0])
+ # If markers affect a newly added nodes, it would have been
+ # caught in the previous loop, (so we skip < startrev)
+ if r is not None and (startrev is None or r < startrev):
+ self._data[r] = 1
+
+ # XXX note that there are a race condition here, since the repo "might"
+ # have changed side the cache update above. However, this code will
+ # mostly be running in a lock so we ignore the issue for now.
+ #
+ # To work around this, 'upgradeneeded' should return a bounded amount
+ # of changeset and markers to read with their associated cachekey. see
+ # 'upgradeneeded' for detail.
+ self._cachekey = getcachekey(repo)
+
+ def save(self, repo):
+ """save the data to disk"""
+
+ # XXX it happens that the obsstore is (buggilly) always up to date on disk
+ if self._cachekey is None:
+ return
+
+ with repo.vfs(self._filepath, 'w', atomictemp=True) as cachefile:
+ headerdata = struct.pack(self._headerformat, *self._cachekey)
+ cachefile.write(headerdata)
+ cachefile.write(self._data)
+
+ def load(self, repo):
+ """load data from disk"""
+ assert repo.filtername is None
+
+ data = repo.vfs.tryread(self._filepath)
+ if not data:
+ return
+
+ headersize = struct.calcsize(self._headerformat)
+ self._cachekey = struct.unpack(self._headerformat, data[:headersize])
+ self._data = bytearray(data[headersize:])
+
+@eh.reposetup
+def setupcache(ui, repo):
+
+ class obscacherepo(repo.__class__):
+
+ @localrepo.unfilteredmethod
+ def destroyed(self):
+ if 'obsstore' in vars(self):
+ self.obsstore.obscache.clear()
+
+ repo.__class__ = obscacherepo