# HG changeset patch # User Sylvain Thénault # Date 1269939462 -7200 # Node ID 135c5d7b89d0f82a87ff4f4fe466ad85ef4e26cd # Parent 10c3422d74197726229d2bad16d96a2c6f4fcaac [querier] introduce RepeatList class, used to optimize size of data returned for result set description When rql query has no ambiguity, we used to return the same description * N where N is the size of the result set. Returning RepeatList class avoid that multiplication. According to quick benchmark this improve performance for result set whose size is ~ > 50, has very small penalty for rset < 50, and in any case improve the size of data to be transfered over the network through pyro connection. diff -r 10c3422d7419 -r 135c5d7b89d0 server/session.py --- a/server/session.py Tue Mar 30 10:50:46 2010 +0200 +++ b/server/session.py Tue Mar 30 10:57:42 2010 +0200 @@ -21,7 +21,7 @@ from cubicweb import Binary, UnknownEid, schema from cubicweb.req import RequestSessionBase from cubicweb.dbapi import ConnectionProperties -from cubicweb.utils import make_uid +from cubicweb.utils import make_uid, RepeatList from cubicweb.rqlrewrite import RQLRewriter ETYPE_PYOBJ_MAP[Binary] = 'Bytes' @@ -829,7 +829,7 @@ selected = rqlst.children[0].selection solution = rqlst.children[0].solutions[0] description = _make_description(selected, args, solution) - return [tuple(description)] * len(result) + return RepeatList(len(result), tuple(description)) # hard, delegate the work :o) return self.manual_build_descr(rqlst, args, result) @@ -858,7 +858,7 @@ etype = rqlst.children[0].solutions[0] basedescription.append(term.get_type(etype, args)) if not todetermine: - return [tuple(basedescription)] * len(result) + return RepeatList(len(result), tuple(basedescription)) return self._build_descr(result, basedescription, todetermine) def _build_descr(self, result, basedescription, todetermine): diff -r 10c3422d7419 -r 135c5d7b89d0 test/unittest_utils.py --- a/test/unittest_utils.py Tue Mar 30 10:50:46 2010 +0200 +++ b/test/unittest_utils.py Tue Mar 30 10:57:42 2010 +0200 @@ -11,7 +11,7 @@ import datetime from logilab.common.testlib import TestCase, unittest_main -from cubicweb.utils import make_uid, UStringIO, SizeConstrainedList +from cubicweb.utils import make_uid, UStringIO, SizeConstrainedList, RepeatList try: import simplejson @@ -41,6 +41,44 @@ self.assert_(UStringIO()) +class RepeatListTC(TestCase): + + def test_base(self): + l = RepeatList(3, (1, 3)) + self.assertEquals(l[0], (1, 3)) + self.assertEquals(l[2], (1, 3)) + self.assertEquals(l[-1], (1, 3)) + self.assertEquals(len(l), 3) + # XXX + self.assertEquals(l[4], (1, 3)) + + self.failIf(RepeatList(0, None)) + + def test_slice(self): + l = RepeatList(3, (1, 3)) + self.assertEquals(l[0:1], [(1, 3)]) + self.assertEquals(l[0:4], [(1, 3)]*3) + self.assertEquals(l[:], [(1, 3)]*3) + + def test_iter(self): + self.assertEquals(list(RepeatList(3, (1, 3))), + [(1, 3)]*3) + + def test_add(self): + l = RepeatList(3, (1, 3)) + self.assertEquals(l + [(1, 4)], [(1, 3)]*3 + [(1, 4)]) + self.assertEquals([(1, 4)] + l, [(1, 4)] + [(1, 3)]*3) + self.assertEquals(l + RepeatList(2, (2, 3)), [(1, 3)]*3 + [(2, 3)]*2) + + x = l + RepeatList(2, (1, 3)) + self.assertIsInstance(x, RepeatList) + self.assertEquals(len(x), 5) + self.assertEquals(x[0], (1, 3)) + + x = l + [(1, 3)] * 2 + self.assertEquals(x, [(1, 3)] * 5) + + class SizeConstrainedListTC(TestCase): def test_append(self): diff -r 10c3422d7419 -r 135c5d7b89d0 utils.py --- a/utils.py Tue Mar 30 10:50:46 2010 +0200 +++ b/utils.py Tue Mar 30 10:57:42 2010 +0200 @@ -12,6 +12,7 @@ import decimal import datetime import random +from itertools import repeat from uuid import uuid4 from warnings import warn @@ -101,6 +102,41 @@ __iadd__ = extend +class RepeatList(object): + """fake a list with the same element in each row""" + __slots__ = ('_size', '_item') + def __init__(self, size, item): + self._size = size + self._item = item + def __len__(self): + return self._size + def __nonzero__(self): + return self._size + def __iter__(self): + return repeat(self._item, self._size) + def __getitem__(self, index): + return self._item + def __getslice__(self, i, j): + # XXX could be more efficient, but do we bother? + return ([self._item] * self._size)[i:j] + def __add__(self, other): + if isinstance(other, RepeatList): + if other._item == self._item: + return RepeatList(self._size + other._size, self._item) + return ([self._item] * self._size) + other[:] + return ([self._item] * self._size) + other + def __radd__(self, other): + if isinstance(other, RepeatList): + if other._item == self._item: + return RepeatList(self._size + other._size, self._item) + return other[:] + ([self._item] * self._size) + return other[:] + ([self._item] * self._size) + def __eq__(self, other): + if isinstance(other, RepeatList): + return other._size == self.size and other._item == self.item + return self[:] == other + + class UStringIO(list): """a file wrapper which automatically encode unicode string to an encoding specifed in the constructor