directaccess: make sure gethashsymbols does not return rev numbers
authorJun Wu <quark@fb.com>
Wed, 22 Jun 2016 18:22:17 +0100
changeset 1714 4565b046b850
parent 1713 00612a019547
child 1716 1650f7dd2048
child 1719 6ff0454f1f23
directaccess: make sure gethashsymbols does not return rev numbers With "hg log -r 1 -r 2 -r 3", gethashsymbols will get the following tree: ('func', ('symbol', '_list'), ('string', '1\x002\x003')) Before this patch, it will return ['1', '2', '3'], which are revision numbers and causes unnecessary (expensive) partialmatch lookups. This patch refacts gethashsymbols a bit, making sure the returning values are always filtered.
hgext/directaccess.py
tests/test-inhibit.t
--- a/hgext/directaccess.py	Wed Jun 15 15:19:38 2016 -0700
+++ b/hgext/directaccess.py	Wed Jun 22 18:22:17 2016 +0100
@@ -132,38 +132,35 @@
 
 _listtuple = ('symbol', '_list')
 
+def _ishashsymbol(symbol, maxrev):
+    # Returns true if symbol looks like a hash
+    try:
+        n = int(symbol)
+        if n <= maxrev:
+            # It's a rev number
+            return False
+    except ValueError:
+        pass
+    return hashre.match(symbol)
+
 def gethashsymbols(tree, maxrev):
     # Returns the list of symbols of the tree that look like hashes
     # for example for the revset 3::abe3ff it will return ('abe3ff')
     if not tree:
         return []
 
+    results = []
     if len(tree) == 2 and tree[0] == "symbol":
-        try:
-            n = int(tree[1])
-            # This isn't necessarily a rev number, could be a hash prefix
-            if n > maxrev:
-                return [tree[1]]
-            else:
-                return []
-        except ValueError as e:
-            if hashre.match(tree[1]):
-                return [tree[1]]
-            return []
+        results.append(tree[1])
     elif tree[0] == "func" and tree[1] == _listtuple:
         # the optimiser will group sequence of hash request
-        result = []
-        for entry in tree[2][1].split('\0'):
-            if hashre.match(entry):
-                result.append(entry)
-        return result
+        results += tree[2][1].split('\0')
     elif len(tree) >= 3:
-        results = []
         for subtree in tree[1:]:
             results += gethashsymbols(subtree, maxrev)
+        # return directly, we don't need to filter symbols again
         return results
-    else:
-        return []
+    return [s for s in results if _ishashsymbol(s, maxrev)]
 
 def _posttreebuilthook(orig, tree, repo):
     # This is use to enabled direct hash access
--- a/tests/test-inhibit.t	Wed Jun 15 15:19:38 2016 -0700
+++ b/tests/test-inhibit.t	Wed Jun 22 18:22:17 2016 +0100
@@ -375,6 +375,21 @@
   cf5c4f4554ce
   2db36d8066ff
 
+Test directaccess only takes hashes
+
+  $ HOOKPATH=$TESTTMP/printexplicitaccess.py
+  $ cat >> $HOOKPATH <<EOF
+  > def hook(ui, repo, **kwds):
+  >     for i in sorted(repo._explicitaccess):
+  >         ui.write('directaccess: %s\n' % i)
+  > EOF
+
+  $ hg log -r 1 -r 2 -r 2db36d8066f -T '{rev}\n' --config hooks.post-log=python:$HOOKPATH:hook
+  1
+  2
+  3
+  directaccess: 3
+
 With severals hidden sha, rebase of one hidden stack onto another one:
   $ hg update -C 0
   0 files updated, 0 files merged, 4 files removed, 0 files unresolved