[querier] introduce RepeatList class, used to optimize size of data returned for result set description stable
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 30 Mar 2010 10:57:42 +0200
branchstable
changeset 5069 135c5d7b89d0
parent 5068 10c3422d7419
child 5070 b1f80ccadda3
[querier] introduce RepeatList class, used to optimize size of data returned for result set description When rql query has no ambiguity, we used to return the same description * N where N is the size of the result set. Returning RepeatList class avoid that multiplication. According to quick benchmark this improve performance for result set whose size is ~ > 50, has very small penalty for rset < 50, and in any case improve the size of data to be transfered over the network through pyro connection.
server/session.py
test/unittest_utils.py
utils.py
--- a/server/session.py	Tue Mar 30 10:50:46 2010 +0200
+++ b/server/session.py	Tue Mar 30 10:57:42 2010 +0200
@@ -21,7 +21,7 @@
 from cubicweb import Binary, UnknownEid, schema
 from cubicweb.req import RequestSessionBase
 from cubicweb.dbapi import ConnectionProperties
-from cubicweb.utils import make_uid
+from cubicweb.utils import make_uid, RepeatList
 from cubicweb.rqlrewrite import RQLRewriter
 
 ETYPE_PYOBJ_MAP[Binary] = 'Bytes'
@@ -829,7 +829,7 @@
             selected = rqlst.children[0].selection
             solution = rqlst.children[0].solutions[0]
             description = _make_description(selected, args, solution)
-            return [tuple(description)] * len(result)
+            return RepeatList(len(result), tuple(description))
         # hard, delegate the work :o)
         return self.manual_build_descr(rqlst, args, result)
 
@@ -858,7 +858,7 @@
                 etype = rqlst.children[0].solutions[0]
                 basedescription.append(term.get_type(etype, args))
         if not todetermine:
-            return [tuple(basedescription)] * len(result)
+            return RepeatList(len(result), tuple(basedescription))
         return self._build_descr(result, basedescription, todetermine)
 
     def _build_descr(self, result, basedescription, todetermine):
--- a/test/unittest_utils.py	Tue Mar 30 10:50:46 2010 +0200
+++ b/test/unittest_utils.py	Tue Mar 30 10:57:42 2010 +0200
@@ -11,7 +11,7 @@
 import datetime
 
 from logilab.common.testlib import TestCase, unittest_main
-from cubicweb.utils import make_uid, UStringIO, SizeConstrainedList
+from cubicweb.utils import make_uid, UStringIO, SizeConstrainedList, RepeatList
 
 try:
     import simplejson
@@ -41,6 +41,44 @@
         self.assert_(UStringIO())
 
 
+class RepeatListTC(TestCase):
+
+    def test_base(self):
+        l = RepeatList(3, (1, 3))
+        self.assertEquals(l[0], (1, 3))
+        self.assertEquals(l[2], (1, 3))
+        self.assertEquals(l[-1], (1, 3))
+        self.assertEquals(len(l), 3)
+        # XXX
+        self.assertEquals(l[4], (1, 3))
+
+        self.failIf(RepeatList(0, None))
+
+    def test_slice(self):
+        l = RepeatList(3, (1, 3))
+        self.assertEquals(l[0:1], [(1, 3)])
+        self.assertEquals(l[0:4], [(1, 3)]*3)
+        self.assertEquals(l[:], [(1, 3)]*3)
+
+    def test_iter(self):
+        self.assertEquals(list(RepeatList(3, (1, 3))),
+                          [(1, 3)]*3)
+
+    def test_add(self):
+        l = RepeatList(3, (1, 3))
+        self.assertEquals(l + [(1, 4)], [(1, 3)]*3  + [(1, 4)])
+        self.assertEquals([(1, 4)] + l, [(1, 4)] + [(1, 3)]*3)
+        self.assertEquals(l + RepeatList(2, (2, 3)), [(1, 3)]*3 + [(2, 3)]*2)
+
+        x = l + RepeatList(2, (1, 3))
+        self.assertIsInstance(x, RepeatList)
+        self.assertEquals(len(x), 5)
+        self.assertEquals(x[0], (1, 3))
+
+        x = l + [(1, 3)] * 2
+        self.assertEquals(x, [(1, 3)] * 5)
+
+
 class SizeConstrainedListTC(TestCase):
 
     def test_append(self):
--- a/utils.py	Tue Mar 30 10:50:46 2010 +0200
+++ b/utils.py	Tue Mar 30 10:57:42 2010 +0200
@@ -12,6 +12,7 @@
 import decimal
 import datetime
 import random
+from itertools import repeat
 from uuid import uuid4
 from warnings import warn
 
@@ -101,6 +102,41 @@
     __iadd__ = extend
 
 
+class RepeatList(object):
+    """fake a list with the same element in each row"""
+    __slots__ = ('_size', '_item')
+    def __init__(self, size, item):
+        self._size = size
+        self._item = item
+    def __len__(self):
+        return self._size
+    def __nonzero__(self):
+        return self._size
+    def __iter__(self):
+        return repeat(self._item, self._size)
+    def __getitem__(self, index):
+        return self._item
+    def __getslice__(self, i, j):
+        # XXX could be more efficient, but do we bother?
+        return ([self._item] * self._size)[i:j]
+    def __add__(self, other):
+        if isinstance(other, RepeatList):
+            if other._item == self._item:
+                return RepeatList(self._size + other._size, self._item)
+            return ([self._item] * self._size) + other[:]
+        return ([self._item] * self._size) + other
+    def __radd__(self, other):
+        if isinstance(other, RepeatList):
+            if other._item == self._item:
+                return RepeatList(self._size + other._size, self._item)
+            return other[:] + ([self._item] * self._size)
+        return other[:] + ([self._item] * self._size)
+    def __eq__(self, other):
+        if isinstance(other, RepeatList):
+            return other._size == self.size and other._item == self.item
+        return self[:] == other
+
+
 class UStringIO(list):
     """a file wrapper which automatically encode unicode string to an encoding
     specifed in the constructor