[querier] introduce RepeatList class, used to optimize size of data returned for result set description
When rql query has no ambiguity, we used to return the same description * N
where N is the size of the result set. Returning RepeatList class avoid
that multiplication.
According to quick benchmark this improve performance for result set whose
size is ~ > 50, has very small penalty for rset < 50, and in any case improve
the size of data to be transfered over the network through pyro connection.
--- a/server/session.py Tue Mar 30 10:50:46 2010 +0200
+++ b/server/session.py Tue Mar 30 10:57:42 2010 +0200
@@ -21,7 +21,7 @@
from cubicweb import Binary, UnknownEid, schema
from cubicweb.req import RequestSessionBase
from cubicweb.dbapi import ConnectionProperties
-from cubicweb.utils import make_uid
+from cubicweb.utils import make_uid, RepeatList
from cubicweb.rqlrewrite import RQLRewriter
ETYPE_PYOBJ_MAP[Binary] = 'Bytes'
@@ -829,7 +829,7 @@
selected = rqlst.children[0].selection
solution = rqlst.children[0].solutions[0]
description = _make_description(selected, args, solution)
- return [tuple(description)] * len(result)
+ return RepeatList(len(result), tuple(description))
# hard, delegate the work :o)
return self.manual_build_descr(rqlst, args, result)
@@ -858,7 +858,7 @@
etype = rqlst.children[0].solutions[0]
basedescription.append(term.get_type(etype, args))
if not todetermine:
- return [tuple(basedescription)] * len(result)
+ return RepeatList(len(result), tuple(basedescription))
return self._build_descr(result, basedescription, todetermine)
def _build_descr(self, result, basedescription, todetermine):
--- a/test/unittest_utils.py Tue Mar 30 10:50:46 2010 +0200
+++ b/test/unittest_utils.py Tue Mar 30 10:57:42 2010 +0200
@@ -11,7 +11,7 @@
import datetime
from logilab.common.testlib import TestCase, unittest_main
-from cubicweb.utils import make_uid, UStringIO, SizeConstrainedList
+from cubicweb.utils import make_uid, UStringIO, SizeConstrainedList, RepeatList
try:
import simplejson
@@ -41,6 +41,44 @@
self.assert_(UStringIO())
+class RepeatListTC(TestCase):
+
+ def test_base(self):
+ l = RepeatList(3, (1, 3))
+ self.assertEquals(l[0], (1, 3))
+ self.assertEquals(l[2], (1, 3))
+ self.assertEquals(l[-1], (1, 3))
+ self.assertEquals(len(l), 3)
+ # XXX
+ self.assertEquals(l[4], (1, 3))
+
+ self.failIf(RepeatList(0, None))
+
+ def test_slice(self):
+ l = RepeatList(3, (1, 3))
+ self.assertEquals(l[0:1], [(1, 3)])
+ self.assertEquals(l[0:4], [(1, 3)]*3)
+ self.assertEquals(l[:], [(1, 3)]*3)
+
+ def test_iter(self):
+ self.assertEquals(list(RepeatList(3, (1, 3))),
+ [(1, 3)]*3)
+
+ def test_add(self):
+ l = RepeatList(3, (1, 3))
+ self.assertEquals(l + [(1, 4)], [(1, 3)]*3 + [(1, 4)])
+ self.assertEquals([(1, 4)] + l, [(1, 4)] + [(1, 3)]*3)
+ self.assertEquals(l + RepeatList(2, (2, 3)), [(1, 3)]*3 + [(2, 3)]*2)
+
+ x = l + RepeatList(2, (1, 3))
+ self.assertIsInstance(x, RepeatList)
+ self.assertEquals(len(x), 5)
+ self.assertEquals(x[0], (1, 3))
+
+ x = l + [(1, 3)] * 2
+ self.assertEquals(x, [(1, 3)] * 5)
+
+
class SizeConstrainedListTC(TestCase):
def test_append(self):
--- a/utils.py Tue Mar 30 10:50:46 2010 +0200
+++ b/utils.py Tue Mar 30 10:57:42 2010 +0200
@@ -12,6 +12,7 @@
import decimal
import datetime
import random
+from itertools import repeat
from uuid import uuid4
from warnings import warn
@@ -101,6 +102,41 @@
__iadd__ = extend
+class RepeatList(object):
+ """fake a list with the same element in each row"""
+ __slots__ = ('_size', '_item')
+ def __init__(self, size, item):
+ self._size = size
+ self._item = item
+ def __len__(self):
+ return self._size
+ def __nonzero__(self):
+ return self._size
+ def __iter__(self):
+ return repeat(self._item, self._size)
+ def __getitem__(self, index):
+ return self._item
+ def __getslice__(self, i, j):
+ # XXX could be more efficient, but do we bother?
+ return ([self._item] * self._size)[i:j]
+ def __add__(self, other):
+ if isinstance(other, RepeatList):
+ if other._item == self._item:
+ return RepeatList(self._size + other._size, self._item)
+ return ([self._item] * self._size) + other[:]
+ return ([self._item] * self._size) + other
+ def __radd__(self, other):
+ if isinstance(other, RepeatList):
+ if other._item == self._item:
+ return RepeatList(self._size + other._size, self._item)
+ return other[:] + ([self._item] * self._size)
+ return other[:] + ([self._item] * self._size)
+ def __eq__(self, other):
+ if isinstance(other, RepeatList):
+ return other._size == self.size and other._item == self.item
+ return self[:] == other
+
+
class UStringIO(list):
"""a file wrapper which automatically encode unicode string to an encoding
specifed in the constructor