diff -r 000000000000 -r b97547f5f1fa rset.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rset.py Wed Nov 05 15:52:50 2008 +0100 @@ -0,0 +1,540 @@ +"""The `ResultSet` class which is returned as result of a rql query + +:organization: Logilab +:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr +""" +__docformat__ = "restructuredtext en" + +from logilab.common.decorators import cached, clear_cache, copy_cache + +from rql import nodes + +from cubicweb import NotAnEntity + + +class ResultSet(object): + """a result set wrap a RQL query result. This object implements a partial + list protocol to allow direct use as a list of result rows. + + :type rowcount: int + :ivar rowcount: number of rows in the result + + :type rows: list + :ivar rows: list of rows of result + + :type description: list + :ivar description: + result's description, using the same structure as the result itself + + :type rql: str or unicode + :ivar rql: the original RQL query string + """ + def __init__(self, results, rql, args=None, description=(), cachekey=None, + rqlst=None): + self.rows = results + self.rowcount = results and len(results) or 0 + # original query and arguments + self.rql = rql + self.args = args + self.cachekey = cachekey + # entity types for each cell (same shape as rows) + # maybe discarded if specified when the query has been executed + self.description = description + # parsed syntax tree + if rqlst is not None: + rqlst.schema = None # reset schema in case of pyro transfert + self._rqlst = rqlst + # set to (limit, offset) when a result set is limited using the + # .limit method + self.limited = None + # set by the cursor which returned this resultset + self.vreg = None + self.req = None + + def __str__(self): + if not self.rows: + return '' % self.rql + return '' % (self.rql, len(self.rows)) + + def __repr__(self): + if not self.rows: + return '' % self.rql + if not self.description: + return '' % (self.rql, '\n'.join(str(r) for r in self.rows)) + return '' % (self.rql, + '\n'.join('%s (%s)' % (r, d) + for r, d in zip(self.rows, self.description))) + + @cached + def possible_actions(self): + return self.vreg.possible_vobjects('actions', self.req, self) + + def __len__(self): + """returns the result set's size""" + return self.rowcount + + def __nonzero__(self): + return self.rowcount + + def __getitem__(self, i): + """returns the ith element of the result set""" + return self.rows[i] #ResultSetRow(self.rows[i]) + + def __getslice__(self, i, j): + """returns slice [i:j] of the result set""" + return self.rows[i:j] + + def __iter__(self): + """Returns an iterator over rows""" + return iter(self.rows) + + def __add__(self, rset): + # XXX buggy implementation (.rql and .args attributes at least much + # probably differ) + # at least rql could be fixed now that we have union and sub-queries + # but I tend to think that since we have that, we should not need this + # method anymore (syt) + rset = ResultSet(self.rows+rset.rows, self.rql, self.args, + self.description +rset.description) + return self.req.decorate_rset(rset) + + def _prepare_copy(self, rows, descr): + rset = ResultSet(rows, self.rql, self.args, descr) + return self.req.decorate_rset(rset) + + def transformed_rset(self, transformcb): + """ the result set according to a given column types + + :type transormcb: callable(row, desc) + :param transformcb: + a callable which should take a row and its type description as + parameters, and return the transformed row and type description. + + + :type col: int + :param col: the column index + + :rtype: `ResultSet` + """ + rows, descr = [], [] + rset = self._prepare_copy(rows, descr) + for row, desc in zip(self.rows, self.description): + nrow, ndesc = transformcb(row, desc) + if ndesc: # transformcb returns None for ndesc to skip that row + rows.append(nrow) + descr.append(ndesc) + rset.rowcount = len(rows) + return rset + + def filtered_rset(self, filtercb, col=0): + """filter the result set according to a given filtercb + + :type filtercb: callable(entity) + :param filtercb: + a callable which should take an entity as argument and return + False if it should be skipped, else True + + :type col: int + :param col: the column index + + :rtype: `ResultSet` + """ + rows, descr = [], [] + rset = self._prepare_copy(rows, descr) + for i in xrange(len(self)): + if not filtercb(self.get_entity(i, col)): + continue + rows.append(self.rows[i]) + descr.append(self.description[i]) + rset.rowcount = len(rows) + return rset + + + def sorted_rset(self, keyfunc, reverse=False, col=0): + """sorts the result set according to a given keyfunc + + :type keyfunc: callable(entity) + :param keyfunc: + a callable which should take an entity as argument and return + the value used to compare and sort + + :type reverse: bool + :param reverse: if the result should be reversed + + :type col: int + :param col: the column index. if col = -1, the whole row are used + + :rtype: `ResultSet` + """ + rows, descr = [], [] + rset = self._prepare_copy(rows, descr) + if col >= 0: + entities = sorted(enumerate(self.entities(col)), + key=lambda (i, e): keyfunc(e), reverse=reverse) + else: + entities = sorted(enumerate(self), + key=lambda (i, e): keyfunc(e), reverse=reverse) + + for index, entity in entities: + rows.append(self.rows[index]) + descr.append(self.description[index]) + rset.rowcount = len(rows) + return rset + + def split_rset(self, keyfunc=None, col=0, return_dict=False): + """Splits the result set in multiple result set according to a given key + + :type keyfunc: callable(entity or FinalType) + :param keyfunc: + a callable which should take a value of the rset in argument and + return the value used to group the value. If not define, raw value + of the specified columns is used. + + :type col: int + :param col: the column index. if col = -1, the whole row are used + + :type return_dict: Boolean + :param return_dict: If true, the function return a mapping + (key -> rset) instead of a list of rset + + :rtype: List of `ResultSet` or mapping of `ResultSet` + + """ + result = [] + mapping = {} + for idx, line in enumerate(self): + if col >= 0: + try: + key = self.get_entity(idx,col) + except NotAnEntity: + key = line[col] + else: + key = line + if keyfunc is not None: + key = keyfunc(key) + + if key not in mapping: + rows, descr = [], [] + rset = self._prepare_copy(rows, descr) + mapping[key] = rset + result.append(rset) + else: + rset = mapping[key] + rset.rows.append(self.rows[idx]) + rset.description.append(self.description[idx]) + + + for rset in result: + rset.rowcount = len(rset.rows) + if return_dict: + return mapping + else: + return result + + def limit(self, limit, offset=0, inplace=False): + """limit the result set to the given number of rows optionaly starting + from an index different than 0 + + :type limit: int + :param limit: the maximum number of results + + :type offset: int + :param offset: the offset index + + :type inplace: bool + :param inplace: + if true, the result set is modified in place, else a new result set + is returned and the original is left unmodified + + :rtype: `ResultSet` + """ + stop = limit+offset + rows = self.rows[offset:stop] + descr = self.description[offset:stop] + if inplace: + rset = self + rset.rows, rset.description = rows, descr + rset.rowcount = len(rows) + clear_cache(rset, 'description_struct') + if offset: + clear_cache(rset, 'get_entity') + # we also have to fix/remove from the request entity cache entities + # which get a wrong rset reference by this limit call + for entity in self.req.cached_entities(): + if entity.rset is self: + if offset <= entity.row < stop: + entity.row = entity.row - offset + else: + self.req.drop_entity_cache(entity.eid) + else: + rset = self._prepare_copy(rows, descr) + if not offset: + # can copy built entity caches + copy_cache(rset, 'get_entity', self) + rset.limited = (limit, offset) + return rset + + def printable_rql(self, encoded=False): + """return the result set's origin rql as a string, with arguments + substitued + """ + encoding = self.req.encoding + rqlstr = self.syntax_tree().as_string(encoding, self.args) + # sounds like we get encoded or unicode string due to a bug in as_string + if not encoded: + if isinstance(rqlstr, unicode): + return rqlstr + return unicode(rqlstr, encoding) + else: + if isinstance(rqlstr, unicode): + return rqlstr.encode(encoding) + return rqlstr + + # client helper methods ################################################### + + def entities(self, col=0): + """iter on entities with eid in the `col` column of the result set""" + for i in xrange(len(self)): + # may have None values in case of outer join (or aggregat on eid + # hacks) + if self.rows[i][col] is not None: + yield self.get_entity(i, col) + + @cached + def get_entity(self, row, col=None): + """special method for query retreiving a single entity, returns a + partially initialized Entity instance. + + WARNING: due to the cache wrapping this function, you should NEVER + give row as a named parameter (i.e. rset.get_entity(req, 0) + is OK but rset.get_entity(row=0, req=req) isn't + + :type row,col: int, int + :param row,col: + row and col numbers localizing the entity among the result's table + + :return: the partially initialized `Entity` instance + """ + if col is None: + from warnings import warn + msg = 'col parameter will become mandatory in future version' + warn(msg, DeprecationWarning, stacklevel=3) + col = 0 + etype = self.description[row][col] + try: + eschema = self.vreg.schema.eschema(etype) + if eschema.is_final(): + raise NotAnEntity(etype) + except KeyError: + raise NotAnEntity(etype) + return self._build_entity(row, col) + + def _build_entity(self, row, col, _localcache=None): + """internal method to get a single entity, returns a + partially initialized Entity instance. + + partially means that only attributes selected in the RQL + query will be directly assigned to the entity. + + :type row,col: int, int + :param row,col: + row and col numbers localizing the entity among the result's table + + :return: the partially initialized `Entity` instance + """ + req = self.req + if req is None: + raise AssertionError('dont call get_entity with no req on the result set') + rowvalues = self.rows[row] + eid = rowvalues[col] + assert eid is not None + # return cached entity if exists. This also avoids potential recursion + # XXX should we consider updating a cached entity with possible + # new attributes found in this resultset ? + try: + if hasattr(req, 'is_super_session'): + # this is a Session object which is not caching entities, so we + # have to use a local cache to avoid recursion pb + if _localcache is None: + _localcache = {} + return _localcache[eid] + else: + return req.entity_cache(eid) + except KeyError: + pass + # build entity instance + etype = self.description[row][col] + entity = self.vreg.etype_class(etype)(req, self, row, col) + entity.set_eid(eid) + # cache entity + if _localcache is not None: + _localcache[eid] = entity + req.set_entity_cache(entity) + eschema = entity.e_schema + # try to complete the entity if there are some additional columns + if len(rowvalues) > 1: + rqlst = self.syntax_tree() + if rqlst.TYPE == 'select': + # UNION query, find the subquery from which this entity has been + # found + rqlst = rqlst.locate_subquery(col, etype, self.args) + # take care, due to outer join support, we may find None + # values for non final relation + for i, attr, x in attr_desc_iterator(rqlst, col): + if x == 'subject': + rschema = eschema.subject_relation(attr) + if rschema.is_final(): + entity[attr] = rowvalues[i] + continue + tetype = rschema.objects(etype)[0] + card = rschema.rproperty(etype, tetype, 'cardinality')[0] + else: + rschema = eschema.object_relation(attr) + tetype = rschema.subjects(etype)[0] + card = rschema.rproperty(tetype, etype, 'cardinality')[1] + # only keep value if it can't be multivalued + if card in '1?': + if rowvalues[i] is None: + if x == 'subject': + rql = 'Any Y WHERE X %s Y, X eid %s' + else: + rql = 'Any Y WHERE Y %s X, X eid %s' + rrset = ResultSet([], rql % (attr, entity.eid)) + req.decorate_rset(rrset) + else: + rrset = self._build_entity(row, i, _localcache).as_rset() + entity.set_related_cache(attr, x, rrset) + return entity + + @cached + def syntax_tree(self): + """get the syntax tree for the source query. + + :rtype: rql.stmts.Statement + :return: the RQL syntax tree of the originating query + """ + if self._rqlst: + rqlst = self._rqlst.copy() + # to avoid transport overhead when pyro is used, the schema has been + # unset from the syntax tree + rqlst.schema = self.vreg.schema + self.vreg.rqlhelper.annotate(rqlst) + else: + rqlst = self.vreg.parse(self.req, self.rql, self.args) + return rqlst + + @cached + def column_types(self, col): + """return the list of different types in the column with the given col + index default to 0 (ie the first column) + + :type col: int + :param col: the index of the desired column + + :rtype: list + :return: the different entities type found in the column + """ + return frozenset(struc[-1][col] for struc in self.description_struct()) + + @cached + def description_struct(self): + """return a list describing sequence of results with the same + description, e.g. : + [[0, 4, ('Bug',)] + [[0, 4, ('Bug',), [5, 8, ('Story',)] + [[0, 3, ('Project', 'Version',)]] + """ + result = [] + last = None + for i, row in enumerate(self.description): + if row != last: + if last is not None: + result[-1][1] = i - 1 + result.append( [i, None, row] ) + last = row + if last is not None: + result[-1][1] = i + return result + + @cached + def related_entity(self, row, col): + """try to get the related entity to extract format information if any""" + locate_query_col = col + rqlst = self.syntax_tree() + etype = self.description[row][col] + if self.vreg.schema.eschema(etype).is_final(): + # final type, find a better (ambiguous) one + for i in xrange(len(rqlst.children[0].selection)): + if i == col: + continue + coletype = self.description[row][i] + if coletype is None: + continue + if not self.vreg.schema.eschema(coletype).is_final(): + etype = coletype + locate_query_col = i + if len(self.column_types(i)) > 1: + break + # UNION query, find the subquery from which this entity has been + # found + select = rqlst.locate_subquery(locate_query_col, etype, self.args) + try: + myvar = select.selection[col].variable + except AttributeError: + # no .selection attribute is available + return None, None + rel = myvar.main_relation() + if rel is not None: + index = rel.children[0].variable.selected_index() + if index is not None: + return self.get_entity(row, index), rel.r_type + return None, None + + @cached + def searched_text(self): + """returns the searched text in case of full-text search + + :return: searched text or `None` if the query is not + a full-text query + """ + rqlst = self.syntax_tree() + for rel in rqlst.iget_nodes(nodes.Relation): + if rel.r_type == 'has_text': + __, rhs = rel.get_variable_parts() + return rhs.eval(self.args) + return None + + +def attr_desc_iterator(rqlst, index=0): + """return an iterator on a list of 2-uple (index, attr_relation) + localizing attribute relations of the main variable in a result's row + + :type rqlst: rql.stmts.Select + :param rqlst: the RQL syntax tree to describe + + :return: + a generator on (index, relation, target) describing column being + attribute of the main variable + """ + main = rqlst.selection[index] + for i, term in enumerate(rqlst.selection): + if i == index: + continue + try: + # XXX rewritten const + var = term.variable + except AttributeError: + continue + #varname = var.name + for ref in var.references(): + rel = ref.relation() + if rel is None or rel.is_types_restriction(): + continue + lhs, rhs = rel.get_variable_parts() + if main.is_equivalent(lhs): + if rhs.is_equivalent(term): + yield (i, rel.r_type, 'subject') + elif main.is_equivalent(rhs): + if lhs.is_equivalent(term): + yield (i, rel.r_type, 'object')