--- a/hgext3rd/evolve/obscache.py Thu May 04 21:21:59 2017 +0200
+++ b/hgext3rd/evolve/obscache.py Thu May 04 21:24:02 2017 +0200
@@ -13,6 +13,7 @@
import errno
from mercurial import (
+ error,
localrepo,
obsolete,
phases,
@@ -20,6 +21,8 @@
util,
)
+from mercurial.i18n import _
+
from . import (
exthelper,
)
@@ -160,6 +163,31 @@
return True, startrev, startidx
+
+# XXX copied as is from Mercurial 4.2 and added the "offset" parameters
+@util.nogc
+def _readmarkers(data, offset=None):
+ """Read and enumerate markers from raw data"""
+ off = 0
+ diskversion = struct.unpack('>B', data[off:off + 1])[0]
+ if offset is None:
+ off += 1
+ else:
+ assert 1 <= offset
+ off = offset
+ if diskversion not in obsolete.formats:
+ raise error.Abort(_('parsing obsolete marker: unknown version %r')
+ % diskversion)
+ return diskversion, obsolete.formats[diskversion][0](data, off)
+
+def markersfrom(obsstore, byteoffset, firstmarker):
+ if '_all' in vars(obsstore):
+ # if the data are in memory, just use that
+ return obsstore._all[firstmarker:]
+ else:
+ obsdata = obsstore.svfs.tryread('obsstore')
+ return _readmarkers(obsdata, byteoffset)[1]
+
class obscache(object):
"""cache the "does a rev" is the precursors of some obsmarkers data
@@ -239,66 +267,70 @@
if startrev is None and startidx is None:
return
+ # checks we never run 'update' without a lock
+ #
+ # There are a potential race condition otherwise, since the repo
+ # "might" have changed side the cache update above. However, this code
+ # will only be running in a lock so we ignore the issue for now.
+ #
+ # Lift this limitation, 'upgradeneeded' should return a bounded amount
+ # of changeset and markers to read with their associated cachekey. see
+ # 'upgradeneeded' for detail.
+ assert repo._currentlock(repo._lockref) is not None
+
# process the new changesets
cl = repo.changelog
if startrev is not None:
- node = cl.node
- # Note:
- #
- # Newly added changeset might be affected by obsolescence markers
- # we already have locally. So we needs to have soem global
- # knowledge about the markers to handle that question. Right this
- # requires parsing all markers in the obsstore. However, we could
- # imagine using various optimisation (eg: bloom filter, other on
- # disk cache) to remove this full parsing.
- #
- # For now we stick to the simpler approach or paying the
- # performance cost on new changesets.
- succs = repo.obsstore.successors
- for r in cl.revs(startrev):
- if node(r) in succs:
- val = 1
- else:
- val = 0
- self._data.append(val)
+ self._updaterevs(repo, cl.revs(startrev))
assert len(self._data) == len(cl), (len(self._data), len(cl))
# process the new obsmarkers
if startidx is not None:
- rev = cl.nodemap.get
- markers = repo.obsstore._all
- # Note:
- #
- # There are no actually needs to load the full obsstore here,
- # since we only read the latest ones. We do it for simplicity in
- # the first implementation. Loading the full obsstore has a
- # performance cost and should go away in this case too. We have
- # two simples options for that:
- #
- # 1) provide and API to start reading markers from a byte offset
- # (we have that data in the cache key)
- #
- # 2) directly update the cache at a lower level, in the code
- # responsible for adding a markers.
- #
- # Option 2 is probably a bit more invasive, but more solid on the long run
+ if startidx == 0: # all markers
+ markers = repo.obsstore._all
+ else:
+ markers = markersfrom(repo.obsstore, self._cachekey[3], startidx)
+ self._updatemarkers(repo, markers)
+
+ self._cachekey = getcachekey(repo)
+
+ def _updaterevs(self, repo, revs):
+ """update the cache with new revisions
+
+ Newly added changeset might be affected by obsolescence markers
+ we already have locally. So we needs to have some global
+ knowledge about the markers to handle that question.
+
+ Right now this requires parsing all markers in the obsstore. We could
+ imagine using various optimisation (eg: another cache, network
+ exchange, etc).
- for i in xrange(startidx, len(repo.obsstore)):
- r = rev(markers[i][0])
- # If markers affect a newly added nodes, it would have been
- # caught in the previous loop, (so we skip < startrev)
- if r is not None and (startrev is None or r < startrev):
- self._data[r] = 1
+ A possible approach to this is to build a set of all node used as
+ precursors in `obsstore._obscandidate`. If markers are not loaded yet,
+ we could initialize it by doing a quick scan through the obsstore data
+ and filling a (pre-sized) set. Doing so would be much faster than
+ parsing all the obsmarkers since we would access less data, not create
+ any object beside the nodes and not have to decode any complex data.
- assert repo._currentlock(repo._lockref) is not None
- # XXX note that there are a potential race condition here, since the
- # repo "might" have changed side the cache update above. However, this
- # code will only be running in a lock so we ignore the issue for now.
- #
- # To work around this, 'upgradeneeded' should return a bounded amount
- # of changeset and markers to read with their associated cachekey. see
- # 'upgradeneeded' for detail.
- self._cachekey = getcachekey(repo)
+ For now we stick to the simpler approach of paying the
+ performance cost on new changesets.
+ """
+ node = repo.changelog.node
+ succs = repo.obsstore.successors
+ for r in revs:
+ if node(r) in succs:
+ val = 1
+ else:
+ val = 0
+ self._data.append(val)
+
+ def _updatemarkers(self, repo, obsmarkers):
+ """update the cache with new markers"""
+ rev = repo.changelog.nodemap.get
+ for m in obsmarkers:
+ r = rev(m[0])
+ if r is not None:
+ self._data[r] = 1
def save(self, repo):
"""save the data to disk"""
@@ -339,7 +371,7 @@
if notpublic:
obscache = repo.obsstore.obscache
# Since we warm the cache at the end of every transaction, the cache
- # should be up to date. However a non-enabled client might have touced
+ # should be up to date. However a non-enabled client might have touched
# the repository.
#
# Updating the cache without a lock is sloppy, so we fallback to the
@@ -348,7 +380,7 @@
#
# With the current implementation updating the cache will requires to
# load the obsstore anyway. Once loaded, hitting the obsstore directly
- # will be about as fast..
+ # will be about as fast...
if not obscache.uptodate(repo):
if repo.currenttransaction() is None:
repo.ui.log('evoext-obscache',
@@ -357,8 +389,9 @@
repo.ui.debug('obscache is out of date')
return orig(repo)
else:
- # If a transaction is open, it is worthwhile to update and use the
- # cache as it will be written on disk when the transaction close.
+ # If a transaction is open, it is worthwhile to update and use
+ # the cache, the lock prevent race and it will be written on
+ # disk when the transaction close.
obscache.update(repo)
isobs = obscache.get
for r in notpublic:
@@ -391,11 +424,10 @@
if repo is None:
return
repo = repo.unfiltered()
- # As pointed in 'obscache.update', we could have the
- # changelog and the obsstore in charge of updating the
- # cache when new items goes it. The tranaction logic would
- # then only be involved for the 'pending' and final saving
- # logic.
+ # As pointed in 'obscache.update', we could have the changelog
+ # and the obsstore in charge of updating the cache when new
+ # items goes it. The tranaction logic would then only be
+ # involved for the 'pending' and final writing on disk.
self.obsstore.obscache.update(repo)
self.obsstore.obscache.save(repo)