--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rset.py Wed Nov 05 15:52:50 2008 +0100
@@ -0,0 +1,540 @@
+"""The `ResultSet` class which is returned as result of a rql query
+
+:organization: Logilab
+:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
+"""
+__docformat__ = "restructuredtext en"
+
+from logilab.common.decorators import cached, clear_cache, copy_cache
+
+from rql import nodes
+
+from cubicweb import NotAnEntity
+
+
+class ResultSet(object):
+ """a result set wrap a RQL query result. This object implements a partial
+ list protocol to allow direct use as a list of result rows.
+
+ :type rowcount: int
+ :ivar rowcount: number of rows in the result
+
+ :type rows: list
+ :ivar rows: list of rows of result
+
+ :type description: list
+ :ivar description:
+ result's description, using the same structure as the result itself
+
+ :type rql: str or unicode
+ :ivar rql: the original RQL query string
+ """
+ def __init__(self, results, rql, args=None, description=(), cachekey=None,
+ rqlst=None):
+ self.rows = results
+ self.rowcount = results and len(results) or 0
+ # original query and arguments
+ self.rql = rql
+ self.args = args
+ self.cachekey = cachekey
+ # entity types for each cell (same shape as rows)
+ # maybe discarded if specified when the query has been executed
+ self.description = description
+ # parsed syntax tree
+ if rqlst is not None:
+ rqlst.schema = None # reset schema in case of pyro transfert
+ self._rqlst = rqlst
+ # set to (limit, offset) when a result set is limited using the
+ # .limit method
+ self.limited = None
+ # set by the cursor which returned this resultset
+ self.vreg = None
+ self.req = None
+
+ def __str__(self):
+ if not self.rows:
+ return '<empty resultset %s>' % self.rql
+ return '<resultset %s (%s rows)>' % (self.rql, len(self.rows))
+
+ def __repr__(self):
+ if not self.rows:
+ return '<empty resultset for %s>' % self.rql
+ if not self.description:
+ return '<resultset %s: %s>' % (self.rql, '\n'.join(str(r) for r in self.rows))
+ return '<resultset %s: %s>' % (self.rql,
+ '\n'.join('%s (%s)' % (r, d)
+ for r, d in zip(self.rows, self.description)))
+
+ @cached
+ def possible_actions(self):
+ return self.vreg.possible_vobjects('actions', self.req, self)
+
+ def __len__(self):
+ """returns the result set's size"""
+ return self.rowcount
+
+ def __nonzero__(self):
+ return self.rowcount
+
+ def __getitem__(self, i):
+ """returns the ith element of the result set"""
+ return self.rows[i] #ResultSetRow(self.rows[i])
+
+ def __getslice__(self, i, j):
+ """returns slice [i:j] of the result set"""
+ return self.rows[i:j]
+
+ def __iter__(self):
+ """Returns an iterator over rows"""
+ return iter(self.rows)
+
+ def __add__(self, rset):
+ # XXX buggy implementation (.rql and .args attributes at least much
+ # probably differ)
+ # at least rql could be fixed now that we have union and sub-queries
+ # but I tend to think that since we have that, we should not need this
+ # method anymore (syt)
+ rset = ResultSet(self.rows+rset.rows, self.rql, self.args,
+ self.description +rset.description)
+ return self.req.decorate_rset(rset)
+
+ def _prepare_copy(self, rows, descr):
+ rset = ResultSet(rows, self.rql, self.args, descr)
+ return self.req.decorate_rset(rset)
+
+ def transformed_rset(self, transformcb):
+ """ the result set according to a given column types
+
+ :type transormcb: callable(row, desc)
+ :param transformcb:
+ a callable which should take a row and its type description as
+ parameters, and return the transformed row and type description.
+
+
+ :type col: int
+ :param col: the column index
+
+ :rtype: `ResultSet`
+ """
+ rows, descr = [], []
+ rset = self._prepare_copy(rows, descr)
+ for row, desc in zip(self.rows, self.description):
+ nrow, ndesc = transformcb(row, desc)
+ if ndesc: # transformcb returns None for ndesc to skip that row
+ rows.append(nrow)
+ descr.append(ndesc)
+ rset.rowcount = len(rows)
+ return rset
+
+ def filtered_rset(self, filtercb, col=0):
+ """filter the result set according to a given filtercb
+
+ :type filtercb: callable(entity)
+ :param filtercb:
+ a callable which should take an entity as argument and return
+ False if it should be skipped, else True
+
+ :type col: int
+ :param col: the column index
+
+ :rtype: `ResultSet`
+ """
+ rows, descr = [], []
+ rset = self._prepare_copy(rows, descr)
+ for i in xrange(len(self)):
+ if not filtercb(self.get_entity(i, col)):
+ continue
+ rows.append(self.rows[i])
+ descr.append(self.description[i])
+ rset.rowcount = len(rows)
+ return rset
+
+
+ def sorted_rset(self, keyfunc, reverse=False, col=0):
+ """sorts the result set according to a given keyfunc
+
+ :type keyfunc: callable(entity)
+ :param keyfunc:
+ a callable which should take an entity as argument and return
+ the value used to compare and sort
+
+ :type reverse: bool
+ :param reverse: if the result should be reversed
+
+ :type col: int
+ :param col: the column index. if col = -1, the whole row are used
+
+ :rtype: `ResultSet`
+ """
+ rows, descr = [], []
+ rset = self._prepare_copy(rows, descr)
+ if col >= 0:
+ entities = sorted(enumerate(self.entities(col)),
+ key=lambda (i, e): keyfunc(e), reverse=reverse)
+ else:
+ entities = sorted(enumerate(self),
+ key=lambda (i, e): keyfunc(e), reverse=reverse)
+
+ for index, entity in entities:
+ rows.append(self.rows[index])
+ descr.append(self.description[index])
+ rset.rowcount = len(rows)
+ return rset
+
+ def split_rset(self, keyfunc=None, col=0, return_dict=False):
+ """Splits the result set in multiple result set according to a given key
+
+ :type keyfunc: callable(entity or FinalType)
+ :param keyfunc:
+ a callable which should take a value of the rset in argument and
+ return the value used to group the value. If not define, raw value
+ of the specified columns is used.
+
+ :type col: int
+ :param col: the column index. if col = -1, the whole row are used
+
+ :type return_dict: Boolean
+ :param return_dict: If true, the function return a mapping
+ (key -> rset) instead of a list of rset
+
+ :rtype: List of `ResultSet` or mapping of `ResultSet`
+
+ """
+ result = []
+ mapping = {}
+ for idx, line in enumerate(self):
+ if col >= 0:
+ try:
+ key = self.get_entity(idx,col)
+ except NotAnEntity:
+ key = line[col]
+ else:
+ key = line
+ if keyfunc is not None:
+ key = keyfunc(key)
+
+ if key not in mapping:
+ rows, descr = [], []
+ rset = self._prepare_copy(rows, descr)
+ mapping[key] = rset
+ result.append(rset)
+ else:
+ rset = mapping[key]
+ rset.rows.append(self.rows[idx])
+ rset.description.append(self.description[idx])
+
+
+ for rset in result:
+ rset.rowcount = len(rset.rows)
+ if return_dict:
+ return mapping
+ else:
+ return result
+
+ def limit(self, limit, offset=0, inplace=False):
+ """limit the result set to the given number of rows optionaly starting
+ from an index different than 0
+
+ :type limit: int
+ :param limit: the maximum number of results
+
+ :type offset: int
+ :param offset: the offset index
+
+ :type inplace: bool
+ :param inplace:
+ if true, the result set is modified in place, else a new result set
+ is returned and the original is left unmodified
+
+ :rtype: `ResultSet`
+ """
+ stop = limit+offset
+ rows = self.rows[offset:stop]
+ descr = self.description[offset:stop]
+ if inplace:
+ rset = self
+ rset.rows, rset.description = rows, descr
+ rset.rowcount = len(rows)
+ clear_cache(rset, 'description_struct')
+ if offset:
+ clear_cache(rset, 'get_entity')
+ # we also have to fix/remove from the request entity cache entities
+ # which get a wrong rset reference by this limit call
+ for entity in self.req.cached_entities():
+ if entity.rset is self:
+ if offset <= entity.row < stop:
+ entity.row = entity.row - offset
+ else:
+ self.req.drop_entity_cache(entity.eid)
+ else:
+ rset = self._prepare_copy(rows, descr)
+ if not offset:
+ # can copy built entity caches
+ copy_cache(rset, 'get_entity', self)
+ rset.limited = (limit, offset)
+ return rset
+
+ def printable_rql(self, encoded=False):
+ """return the result set's origin rql as a string, with arguments
+ substitued
+ """
+ encoding = self.req.encoding
+ rqlstr = self.syntax_tree().as_string(encoding, self.args)
+ # sounds like we get encoded or unicode string due to a bug in as_string
+ if not encoded:
+ if isinstance(rqlstr, unicode):
+ return rqlstr
+ return unicode(rqlstr, encoding)
+ else:
+ if isinstance(rqlstr, unicode):
+ return rqlstr.encode(encoding)
+ return rqlstr
+
+ # client helper methods ###################################################
+
+ def entities(self, col=0):
+ """iter on entities with eid in the `col` column of the result set"""
+ for i in xrange(len(self)):
+ # may have None values in case of outer join (or aggregat on eid
+ # hacks)
+ if self.rows[i][col] is not None:
+ yield self.get_entity(i, col)
+
+ @cached
+ def get_entity(self, row, col=None):
+ """special method for query retreiving a single entity, returns a
+ partially initialized Entity instance.
+
+ WARNING: due to the cache wrapping this function, you should NEVER
+ give row as a named parameter (i.e. rset.get_entity(req, 0)
+ is OK but rset.get_entity(row=0, req=req) isn't
+
+ :type row,col: int, int
+ :param row,col:
+ row and col numbers localizing the entity among the result's table
+
+ :return: the partially initialized `Entity` instance
+ """
+ if col is None:
+ from warnings import warn
+ msg = 'col parameter will become mandatory in future version'
+ warn(msg, DeprecationWarning, stacklevel=3)
+ col = 0
+ etype = self.description[row][col]
+ try:
+ eschema = self.vreg.schema.eschema(etype)
+ if eschema.is_final():
+ raise NotAnEntity(etype)
+ except KeyError:
+ raise NotAnEntity(etype)
+ return self._build_entity(row, col)
+
+ def _build_entity(self, row, col, _localcache=None):
+ """internal method to get a single entity, returns a
+ partially initialized Entity instance.
+
+ partially means that only attributes selected in the RQL
+ query will be directly assigned to the entity.
+
+ :type row,col: int, int
+ :param row,col:
+ row and col numbers localizing the entity among the result's table
+
+ :return: the partially initialized `Entity` instance
+ """
+ req = self.req
+ if req is None:
+ raise AssertionError('dont call get_entity with no req on the result set')
+ rowvalues = self.rows[row]
+ eid = rowvalues[col]
+ assert eid is not None
+ # return cached entity if exists. This also avoids potential recursion
+ # XXX should we consider updating a cached entity with possible
+ # new attributes found in this resultset ?
+ try:
+ if hasattr(req, 'is_super_session'):
+ # this is a Session object which is not caching entities, so we
+ # have to use a local cache to avoid recursion pb
+ if _localcache is None:
+ _localcache = {}
+ return _localcache[eid]
+ else:
+ return req.entity_cache(eid)
+ except KeyError:
+ pass
+ # build entity instance
+ etype = self.description[row][col]
+ entity = self.vreg.etype_class(etype)(req, self, row, col)
+ entity.set_eid(eid)
+ # cache entity
+ if _localcache is not None:
+ _localcache[eid] = entity
+ req.set_entity_cache(entity)
+ eschema = entity.e_schema
+ # try to complete the entity if there are some additional columns
+ if len(rowvalues) > 1:
+ rqlst = self.syntax_tree()
+ if rqlst.TYPE == 'select':
+ # UNION query, find the subquery from which this entity has been
+ # found
+ rqlst = rqlst.locate_subquery(col, etype, self.args)
+ # take care, due to outer join support, we may find None
+ # values for non final relation
+ for i, attr, x in attr_desc_iterator(rqlst, col):
+ if x == 'subject':
+ rschema = eschema.subject_relation(attr)
+ if rschema.is_final():
+ entity[attr] = rowvalues[i]
+ continue
+ tetype = rschema.objects(etype)[0]
+ card = rschema.rproperty(etype, tetype, 'cardinality')[0]
+ else:
+ rschema = eschema.object_relation(attr)
+ tetype = rschema.subjects(etype)[0]
+ card = rschema.rproperty(tetype, etype, 'cardinality')[1]
+ # only keep value if it can't be multivalued
+ if card in '1?':
+ if rowvalues[i] is None:
+ if x == 'subject':
+ rql = 'Any Y WHERE X %s Y, X eid %s'
+ else:
+ rql = 'Any Y WHERE Y %s X, X eid %s'
+ rrset = ResultSet([], rql % (attr, entity.eid))
+ req.decorate_rset(rrset)
+ else:
+ rrset = self._build_entity(row, i, _localcache).as_rset()
+ entity.set_related_cache(attr, x, rrset)
+ return entity
+
+ @cached
+ def syntax_tree(self):
+ """get the syntax tree for the source query.
+
+ :rtype: rql.stmts.Statement
+ :return: the RQL syntax tree of the originating query
+ """
+ if self._rqlst:
+ rqlst = self._rqlst.copy()
+ # to avoid transport overhead when pyro is used, the schema has been
+ # unset from the syntax tree
+ rqlst.schema = self.vreg.schema
+ self.vreg.rqlhelper.annotate(rqlst)
+ else:
+ rqlst = self.vreg.parse(self.req, self.rql, self.args)
+ return rqlst
+
+ @cached
+ def column_types(self, col):
+ """return the list of different types in the column with the given col
+ index default to 0 (ie the first column)
+
+ :type col: int
+ :param col: the index of the desired column
+
+ :rtype: list
+ :return: the different entities type found in the column
+ """
+ return frozenset(struc[-1][col] for struc in self.description_struct())
+
+ @cached
+ def description_struct(self):
+ """return a list describing sequence of results with the same
+ description, e.g. :
+ [[0, 4, ('Bug',)]
+ [[0, 4, ('Bug',), [5, 8, ('Story',)]
+ [[0, 3, ('Project', 'Version',)]]
+ """
+ result = []
+ last = None
+ for i, row in enumerate(self.description):
+ if row != last:
+ if last is not None:
+ result[-1][1] = i - 1
+ result.append( [i, None, row] )
+ last = row
+ if last is not None:
+ result[-1][1] = i
+ return result
+
+ @cached
+ def related_entity(self, row, col):
+ """try to get the related entity to extract format information if any"""
+ locate_query_col = col
+ rqlst = self.syntax_tree()
+ etype = self.description[row][col]
+ if self.vreg.schema.eschema(etype).is_final():
+ # final type, find a better (ambiguous) one
+ for i in xrange(len(rqlst.children[0].selection)):
+ if i == col:
+ continue
+ coletype = self.description[row][i]
+ if coletype is None:
+ continue
+ if not self.vreg.schema.eschema(coletype).is_final():
+ etype = coletype
+ locate_query_col = i
+ if len(self.column_types(i)) > 1:
+ break
+ # UNION query, find the subquery from which this entity has been
+ # found
+ select = rqlst.locate_subquery(locate_query_col, etype, self.args)
+ try:
+ myvar = select.selection[col].variable
+ except AttributeError:
+ # no .selection attribute is available
+ return None, None
+ rel = myvar.main_relation()
+ if rel is not None:
+ index = rel.children[0].variable.selected_index()
+ if index is not None:
+ return self.get_entity(row, index), rel.r_type
+ return None, None
+
+ @cached
+ def searched_text(self):
+ """returns the searched text in case of full-text search
+
+ :return: searched text or `None` if the query is not
+ a full-text query
+ """
+ rqlst = self.syntax_tree()
+ for rel in rqlst.iget_nodes(nodes.Relation):
+ if rel.r_type == 'has_text':
+ __, rhs = rel.get_variable_parts()
+ return rhs.eval(self.args)
+ return None
+
+
+def attr_desc_iterator(rqlst, index=0):
+ """return an iterator on a list of 2-uple (index, attr_relation)
+ localizing attribute relations of the main variable in a result's row
+
+ :type rqlst: rql.stmts.Select
+ :param rqlst: the RQL syntax tree to describe
+
+ :return:
+ a generator on (index, relation, target) describing column being
+ attribute of the main variable
+ """
+ main = rqlst.selection[index]
+ for i, term in enumerate(rqlst.selection):
+ if i == index:
+ continue
+ try:
+ # XXX rewritten const
+ var = term.variable
+ except AttributeError:
+ continue
+ #varname = var.name
+ for ref in var.references():
+ rel = ref.relation()
+ if rel is None or rel.is_types_restriction():
+ continue
+ lhs, rhs = rel.get_variable_parts()
+ if main.is_equivalent(lhs):
+ if rhs.is_equivalent(term):
+ yield (i, rel.r_type, 'subject')
+ elif main.is_equivalent(rhs):
+ if lhs.is_equivalent(term):
+ yield (i, rel.r_type, 'object')