--- a/hgext3rd/pullbundle.py Tue Sep 25 13:44:32 2018 +0200
+++ b/hgext3rd/pullbundle.py Tue Sep 25 18:23:46 2018 +0200
@@ -72,16 +72,21 @@
extensions next to it. As soon as stable range have been upstreamed, we won't
need the dependency to the evolve extension anymore.
"""
+
+import collections
import errno
+import random
import os
from mercurial import (
changegroup,
discovery,
+ error,
exchange,
narrowspec,
node as nodemod,
registrar,
+ scmutil,
util,
)
@@ -92,6 +97,9 @@
# minimumhgversion = ''
buglink = 'https://bz.mercurial-scm.org/'
+cmdtable = {}
+command = registrar.command(cmdtable)
+
configtable = {}
configitem = registrar.configitem(configtable)
@@ -437,3 +445,114 @@
pversion = version
partdata = (cachedata, nbchanges, pversion)
return _makepartfromstream(newpart, repo, *partdata)
+
+@command('^debugpullbundlecacheoverlap',
+ [('', 'count', 100, _('of "client" pulling')),
+ ],
+ _('hg debugpullbundlecacheoverlap [--client 100] REVSET'))
+def debugpullbundlecacheoverlap(ui, repo, *revs, **opts):
+ '''Display statistic on bundle cache hit
+
+ This command "simulate pulls from multiple clients. Each using a random
+ subset of revisions defined by REVSET. And display statistic about the
+ overlap in bundle necessary to serve them.
+ '''
+ actionrevs = scmutil.revrange(repo, revs)
+ if not revs:
+ raise error.Abort('No revision selected')
+ count = opts['count']
+
+ bundlehits = collections.defaultdict(lambda: 0)
+ pullstats = []
+
+ rlen = lambda rangeid: repo.stablerange.rangelength(repo, rangeid)
+
+ repo.ui.write("gathering %d sample pulls within %d revisions\n"
+ % (count, len(actionrevs)))
+ for i in xrange(count):
+ repo.ui.progress('gathering data', i, total=count)
+ outgoing = takeonesample(repo, actionrevs)
+ ranges = sliceoutgoing(repo, outgoing)
+ hitranges = 0
+ hitchanges = 0
+ totalchanges = 0
+ for rangeid, __ in ranges:
+ length = rlen(rangeid)
+ totalchanges += length
+ if bundlehits[rangeid]:
+ hitranges += 1
+ hitchanges += rlen(rangeid)
+ bundlehits[rangeid] += 1
+ stats = (len(outgoing.missing),
+ totalchanges,
+ hitchanges,
+ len(ranges),
+ hitranges,
+ )
+ pullstats.append(stats)
+ repo.ui.progress('gathering data', None)
+
+ sizes = []
+ changesmissing = []
+ totalchanges = 0
+ totalcached = 0
+ changesratio = []
+ rangesratio = []
+ bundlecount = []
+ for entry in pullstats:
+ sizes.append(entry[0])
+ changesmissing.append(entry[1] - entry[2])
+ changesratio.append(entry[2] / float(entry[1]))
+ rangesratio.append(entry[4] / float(entry[3]))
+ bundlecount.append(entry[3])
+ totalchanges += entry[1]
+ totalcached += entry[2]
+
+ sizesdist = distribution(sizes)
+ repo.ui.write(fmtdist('pull size', sizesdist))
+ changesmissingdist = distribution(changesmissing)
+ repo.ui.write(fmtdist('non-cached changesets', changesmissingdist))
+ changesratiodist = distribution(changesratio)
+ repo.ui.write(fmtdist('ratio of cached changesets', changesratiodist))
+ bundlecountdist = distribution(bundlecount)
+ repo.ui.write(fmtdist('bundle count', bundlecountdist))
+ rangesratiodist = distribution(rangesratio)
+ repo.ui.write(fmtdist('ratio of cached bundles', rangesratiodist))
+ repo.ui.write('changesets served:\n')
+ repo.ui.write(' total: %7d\n' % totalchanges)
+ repo.ui.write(' from cache: %7d (%2d%%)\n'
+ % (totalcached, (totalcached * 100 // totalchanges)))
+ repo.ui.write(' bundle: %7d\n' % sum(bundlecount))
+
+def takeonesample(repo, revs):
+ node = repo.changelog.node
+ pulled = random.sample(revs, max(4, len(revs) // 1000))
+ pulled = repo.revs('%ld::%ld', pulled, pulled)
+ nodes = [node(r) for r in pulled]
+ return outgoingfromnodes(repo, nodes)
+
+def distribution(data):
+ data.sort()
+ length = len(data)
+ return {
+ 'min': data[0],
+ '10%': data[length // 10],
+ '25%': data[length // 4],
+ '50%': data[length // 2],
+ '75%': data[(length // 4) * 3],
+ '90%': data[(length // 10) * 9],
+ 'max': data[-1],
+ }
+
+STATSFORMAT = """{name}:
+ min: {min}
+ 10%: {10%}
+ 25%: {25%}
+ 50%: {50%}
+ 75%: {75%}
+ 90%: {90%}
+ max: {max}
+"""
+
+def fmtdist(name, data):
+ return STATSFORMAT.format(name=name, **data)