rset.py
changeset 0 b97547f5f1fa
child 170 455ff18ef28e
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
       
     1 """The `ResultSet` class which is returned as result of a rql query
       
     2 
       
     3 :organization: Logilab
       
     4 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     5 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     6 """
       
     7 __docformat__ = "restructuredtext en"
       
     8 
       
     9 from logilab.common.decorators import cached, clear_cache, copy_cache
       
    10 
       
    11 from rql import nodes
       
    12 
       
    13 from cubicweb import NotAnEntity
       
    14     
       
    15 
       
    16 class ResultSet(object):
       
    17     """a result set wrap a RQL query result. This object implements a partial
       
    18     list protocol to allow direct use as a list of result rows.
       
    19 
       
    20     :type rowcount: int
       
    21     :ivar rowcount: number of rows in the result
       
    22 
       
    23     :type rows: list
       
    24     :ivar rows: list of rows of result
       
    25 
       
    26     :type description: list
       
    27     :ivar description:
       
    28       result's description, using the same structure as the result itself
       
    29 
       
    30     :type rql: str or unicode
       
    31     :ivar rql: the original RQL query string
       
    32     """
       
    33     def __init__(self, results, rql, args=None, description=(), cachekey=None,
       
    34                  rqlst=None):
       
    35         self.rows = results
       
    36         self.rowcount = results and len(results) or 0
       
    37         # original query and arguments
       
    38         self.rql = rql
       
    39         self.args = args
       
    40         self.cachekey = cachekey
       
    41         # entity types for each cell (same shape as rows)
       
    42         # maybe discarded if specified when the query has been executed
       
    43         self.description = description
       
    44         # parsed syntax tree
       
    45         if rqlst is not None:
       
    46             rqlst.schema = None # reset schema in case of pyro transfert
       
    47         self._rqlst = rqlst
       
    48         # set to (limit, offset) when a result set is limited using the
       
    49         # .limit method
       
    50         self.limited = None
       
    51         # set by the cursor which returned this resultset
       
    52         self.vreg = None
       
    53         self.req = None
       
    54    
       
    55     def __str__(self):
       
    56         if not self.rows:
       
    57             return '<empty resultset %s>' % self.rql
       
    58         return '<resultset %s (%s rows)>' % (self.rql, len(self.rows))
       
    59     
       
    60     def __repr__(self):
       
    61         if not self.rows:
       
    62             return '<empty resultset for %s>' % self.rql
       
    63         if not self.description:
       
    64             return '<resultset %s: %s>' % (self.rql, '\n'.join(str(r) for r in self.rows))
       
    65         return '<resultset %s: %s>' % (self.rql,
       
    66                                        '\n'.join('%s (%s)' % (r, d)
       
    67                                                  for r, d in zip(self.rows, self.description)))
       
    68 
       
    69     @cached
       
    70     def possible_actions(self):
       
    71         return self.vreg.possible_vobjects('actions', self.req, self)
       
    72     
       
    73     def __len__(self):
       
    74         """returns the result set's size"""
       
    75         return self.rowcount
       
    76 
       
    77     def __nonzero__(self):
       
    78         return self.rowcount
       
    79     
       
    80     def __getitem__(self, i):
       
    81         """returns the ith element of the result set"""
       
    82         return self.rows[i] #ResultSetRow(self.rows[i])
       
    83     
       
    84     def __getslice__(self, i, j):
       
    85         """returns slice [i:j] of the result set"""
       
    86         return self.rows[i:j]
       
    87         
       
    88     def __iter__(self):
       
    89         """Returns an iterator over rows"""
       
    90         return iter(self.rows)
       
    91 
       
    92     def __add__(self, rset):
       
    93         # XXX buggy implementation (.rql and .args attributes at least much
       
    94         # probably differ)
       
    95         # at least rql could be fixed now that we have union and sub-queries
       
    96         # but I tend to think that since we have that, we should not need this
       
    97         # method anymore (syt)
       
    98         rset = ResultSet(self.rows+rset.rows, self.rql, self.args,
       
    99                          self.description +rset.description)
       
   100         return self.req.decorate_rset(rset)
       
   101 
       
   102     def _prepare_copy(self, rows, descr):
       
   103         rset = ResultSet(rows, self.rql, self.args, descr)
       
   104         return self.req.decorate_rset(rset)
       
   105 
       
   106     def transformed_rset(self, transformcb):
       
   107         """ the result set according to a given column types
       
   108 
       
   109         :type transormcb: callable(row, desc)
       
   110         :param transformcb:
       
   111           a callable which should take a row and its type description as
       
   112           parameters, and return the transformed row and type description.
       
   113           
       
   114 
       
   115         :type col: int
       
   116         :param col: the column index
       
   117 
       
   118         :rtype: `ResultSet`
       
   119         """
       
   120         rows, descr = [], []
       
   121         rset = self._prepare_copy(rows, descr)
       
   122         for row, desc in zip(self.rows, self.description):
       
   123             nrow, ndesc = transformcb(row, desc)
       
   124             if ndesc: # transformcb returns None for ndesc to skip that row
       
   125                 rows.append(nrow)
       
   126                 descr.append(ndesc)
       
   127         rset.rowcount = len(rows)
       
   128         return rset
       
   129 
       
   130     def filtered_rset(self, filtercb, col=0):
       
   131         """filter the result set according to a given filtercb
       
   132 
       
   133         :type filtercb: callable(entity)
       
   134         :param filtercb:
       
   135           a callable which should take an entity as argument and return
       
   136           False if it should be skipped, else True
       
   137 
       
   138         :type col: int
       
   139         :param col: the column index
       
   140 
       
   141         :rtype: `ResultSet`
       
   142         """
       
   143         rows, descr = [], []
       
   144         rset = self._prepare_copy(rows, descr)
       
   145         for i in xrange(len(self)):
       
   146             if not filtercb(self.get_entity(i, col)):
       
   147                 continue
       
   148             rows.append(self.rows[i])
       
   149             descr.append(self.description[i])
       
   150         rset.rowcount = len(rows)
       
   151         return rset
       
   152 
       
   153 
       
   154     def sorted_rset(self, keyfunc, reverse=False, col=0):
       
   155         """sorts the result set according to a given keyfunc
       
   156 
       
   157         :type keyfunc: callable(entity)
       
   158         :param keyfunc:
       
   159           a callable which should take an entity as argument and return
       
   160           the value used to compare and sort
       
   161 
       
   162         :type reverse: bool
       
   163         :param reverse: if the result should be reversed
       
   164 
       
   165         :type col: int
       
   166         :param col: the column index. if col = -1, the whole row are used
       
   167 
       
   168         :rtype: `ResultSet`
       
   169         """
       
   170         rows, descr = [], []
       
   171         rset = self._prepare_copy(rows, descr)
       
   172         if col >= 0:
       
   173             entities = sorted(enumerate(self.entities(col)),
       
   174                               key=lambda (i, e): keyfunc(e), reverse=reverse)
       
   175         else:
       
   176             entities = sorted(enumerate(self),
       
   177                               key=lambda (i, e): keyfunc(e), reverse=reverse)
       
   178 
       
   179         for index, entity in entities:
       
   180             rows.append(self.rows[index])
       
   181             descr.append(self.description[index])
       
   182         rset.rowcount = len(rows)
       
   183         return rset
       
   184 
       
   185     def split_rset(self, keyfunc=None, col=0, return_dict=False):
       
   186         """Splits the result set in multiple result set according to a given key
       
   187     
       
   188         :type keyfunc: callable(entity or FinalType)
       
   189         :param keyfunc:
       
   190           a callable which should take a value of the rset in argument and
       
   191           return the value used to group the value. If not define, raw value
       
   192           of the specified columns is used.
       
   193 
       
   194         :type col: int
       
   195         :param col: the column index. if col = -1, the whole row are used
       
   196 
       
   197         :type return_dict: Boolean
       
   198         :param return_dict: If true, the function return a mapping
       
   199             (key -> rset) instead of a list of rset
       
   200 
       
   201         :rtype: List of `ResultSet` or mapping of  `ResultSet`
       
   202 
       
   203         """
       
   204         result = []
       
   205         mapping = {}
       
   206         for idx, line in enumerate(self):
       
   207             if col >= 0:
       
   208                 try:
       
   209                     key = self.get_entity(idx,col)
       
   210                 except NotAnEntity:
       
   211                     key = line[col]
       
   212             else:
       
   213                 key = line
       
   214             if keyfunc is not None:
       
   215                 key = keyfunc(key)
       
   216 
       
   217             if key not in mapping:
       
   218                 rows, descr = [], []
       
   219                 rset = self._prepare_copy(rows, descr)
       
   220                 mapping[key] = rset
       
   221                 result.append(rset)
       
   222             else:
       
   223                 rset = mapping[key]
       
   224             rset.rows.append(self.rows[idx])
       
   225             rset.description.append(self.description[idx])
       
   226 
       
   227 
       
   228         for rset in result:
       
   229             rset.rowcount = len(rset.rows)
       
   230         if return_dict:
       
   231             return mapping
       
   232         else:
       
   233             return result
       
   234 
       
   235     def limit(self, limit, offset=0, inplace=False):
       
   236         """limit the result set to the given number of rows optionaly starting
       
   237         from an index different than 0
       
   238 
       
   239         :type limit: int
       
   240         :param limit: the maximum number of results
       
   241 
       
   242         :type offset: int
       
   243         :param offset: the offset index
       
   244         
       
   245         :type inplace: bool
       
   246         :param inplace:
       
   247           if true, the result set is modified in place, else a new result set
       
   248           is returned and the original is left unmodified
       
   249 
       
   250         :rtype: `ResultSet`
       
   251         """
       
   252         stop = limit+offset
       
   253         rows = self.rows[offset:stop]
       
   254         descr = self.description[offset:stop]
       
   255         if inplace:
       
   256             rset = self
       
   257             rset.rows, rset.description = rows, descr
       
   258             rset.rowcount = len(rows)
       
   259             clear_cache(rset, 'description_struct')
       
   260             if offset:
       
   261                 clear_cache(rset, 'get_entity')
       
   262             # we also have to fix/remove from the request entity cache entities
       
   263             # which get a wrong rset reference by this limit call
       
   264             for entity in self.req.cached_entities():
       
   265                 if entity.rset is self:
       
   266                     if offset <= entity.row < stop:
       
   267                         entity.row = entity.row - offset
       
   268                     else:
       
   269                         self.req.drop_entity_cache(entity.eid)
       
   270         else:
       
   271             rset = self._prepare_copy(rows, descr)
       
   272             if not offset:
       
   273                 # can copy built entity caches
       
   274                 copy_cache(rset, 'get_entity', self)
       
   275         rset.limited = (limit, offset)
       
   276         return rset
       
   277     
       
   278     def printable_rql(self, encoded=False):
       
   279         """return the result set's origin rql as a string, with arguments
       
   280         substitued
       
   281         """
       
   282         encoding = self.req.encoding
       
   283         rqlstr = self.syntax_tree().as_string(encoding, self.args)
       
   284         # sounds like we get encoded or unicode string due to a bug in as_string
       
   285         if not encoded:
       
   286             if isinstance(rqlstr, unicode):
       
   287                 return rqlstr
       
   288             return unicode(rqlstr, encoding)
       
   289         else: 
       
   290             if isinstance(rqlstr, unicode):
       
   291                 return rqlstr.encode(encoding)
       
   292             return rqlstr
       
   293        
       
   294     # client helper methods ###################################################
       
   295 
       
   296     def entities(self, col=0):
       
   297         """iter on entities with eid in the `col` column of the result set"""
       
   298         for i in xrange(len(self)):
       
   299             # may have None values in case of outer join (or aggregat on eid
       
   300             # hacks)
       
   301             if self.rows[i][col] is not None:
       
   302                 yield self.get_entity(i, col)
       
   303 
       
   304     @cached
       
   305     def get_entity(self, row, col=None):
       
   306         """special method for query retreiving a single entity, returns a
       
   307         partially initialized Entity instance.
       
   308         
       
   309         WARNING: due to the cache wrapping this function, you should NEVER
       
   310                  give row as a named parameter (i.e. rset.get_entity(req, 0)
       
   311                  is OK but rset.get_entity(row=0, req=req) isn't
       
   312 
       
   313         :type row,col: int, int
       
   314         :param row,col:
       
   315           row and col numbers localizing the entity among the result's table
       
   316 
       
   317         :return: the partially initialized `Entity` instance
       
   318         """
       
   319         if col is None:
       
   320             from warnings import warn
       
   321             msg = 'col parameter will become mandatory in future version'
       
   322             warn(msg, DeprecationWarning, stacklevel=3)
       
   323             col = 0
       
   324         etype = self.description[row][col]
       
   325         try:
       
   326             eschema = self.vreg.schema.eschema(etype)
       
   327             if eschema.is_final():
       
   328                 raise NotAnEntity(etype)
       
   329         except KeyError:
       
   330             raise NotAnEntity(etype)
       
   331         return self._build_entity(row, col)
       
   332 
       
   333     def _build_entity(self, row, col, _localcache=None):
       
   334         """internal method to get a single entity, returns a
       
   335         partially initialized Entity instance.
       
   336 
       
   337         partially means that only attributes selected in the RQL
       
   338         query will be directly assigned to the entity.
       
   339         
       
   340         :type row,col: int, int
       
   341         :param row,col:
       
   342           row and col numbers localizing the entity among the result's table
       
   343 
       
   344         :return: the partially initialized `Entity` instance
       
   345         """
       
   346         req = self.req
       
   347         if req is None:
       
   348             raise AssertionError('dont call get_entity with no req on the result set')
       
   349         rowvalues = self.rows[row]
       
   350         eid = rowvalues[col]
       
   351         assert eid is not None
       
   352         # return cached entity if exists. This also avoids potential recursion
       
   353         # XXX should we consider updating a cached entity with possible
       
   354         #     new attributes found in this resultset ?
       
   355         try:
       
   356             if hasattr(req, 'is_super_session'):
       
   357                 # this is a Session object which is not caching entities, so we
       
   358                 # have to use a local cache to avoid recursion pb
       
   359                 if _localcache is None:
       
   360                     _localcache = {}
       
   361                 return _localcache[eid]
       
   362             else:
       
   363                 return req.entity_cache(eid)
       
   364         except KeyError:
       
   365             pass
       
   366         # build entity instance
       
   367         etype = self.description[row][col]
       
   368         entity = self.vreg.etype_class(etype)(req, self, row, col)
       
   369         entity.set_eid(eid)
       
   370         # cache entity
       
   371         if _localcache is not None:
       
   372             _localcache[eid] = entity
       
   373         req.set_entity_cache(entity)
       
   374         eschema = entity.e_schema
       
   375         # try to complete the entity if there are some additional columns
       
   376         if len(rowvalues) > 1:
       
   377             rqlst = self.syntax_tree()
       
   378             if rqlst.TYPE == 'select':
       
   379                 # UNION query, find the subquery from which this entity has been
       
   380                 # found
       
   381                 rqlst = rqlst.locate_subquery(col, etype, self.args)
       
   382             # take care, due to outer join support, we may find None
       
   383             # values for non final relation
       
   384             for i, attr, x in attr_desc_iterator(rqlst, col):
       
   385                 if x == 'subject':
       
   386                     rschema = eschema.subject_relation(attr)
       
   387                     if rschema.is_final():
       
   388                         entity[attr] = rowvalues[i]
       
   389                         continue
       
   390                     tetype = rschema.objects(etype)[0]
       
   391                     card = rschema.rproperty(etype, tetype, 'cardinality')[0]
       
   392                 else:
       
   393                     rschema = eschema.object_relation(attr)
       
   394                     tetype = rschema.subjects(etype)[0]
       
   395                     card = rschema.rproperty(tetype, etype, 'cardinality')[1]
       
   396                 # only keep value if it can't be multivalued
       
   397                 if card in '1?':
       
   398                     if rowvalues[i] is None:
       
   399                         if x == 'subject':
       
   400                             rql = 'Any Y WHERE X %s Y, X eid %s'
       
   401                         else:
       
   402                             rql = 'Any Y WHERE Y %s X, X eid %s'
       
   403                         rrset = ResultSet([], rql % (attr, entity.eid))
       
   404                         req.decorate_rset(rrset)
       
   405                     else:
       
   406                         rrset = self._build_entity(row, i, _localcache).as_rset()
       
   407                     entity.set_related_cache(attr, x, rrset)
       
   408         return entity
       
   409 
       
   410     @cached
       
   411     def syntax_tree(self):
       
   412         """get the syntax tree for the source query. 
       
   413 
       
   414         :rtype: rql.stmts.Statement
       
   415         :return: the RQL syntax tree of the originating query
       
   416         """
       
   417         if self._rqlst:
       
   418             rqlst = self._rqlst.copy()
       
   419             # to avoid transport overhead when pyro is used, the schema has been
       
   420             # unset from the syntax tree
       
   421             rqlst.schema = self.vreg.schema
       
   422             self.vreg.rqlhelper.annotate(rqlst)
       
   423         else:
       
   424             rqlst = self.vreg.parse(self.req, self.rql, self.args)
       
   425         return rqlst
       
   426         
       
   427     @cached
       
   428     def column_types(self, col):
       
   429         """return the list of different types in the column with the given col
       
   430         index default to 0 (ie the first column)
       
   431         
       
   432         :type col: int
       
   433         :param col: the index of the desired column
       
   434 
       
   435         :rtype: list
       
   436         :return: the different entities type found in the column
       
   437         """
       
   438         return frozenset(struc[-1][col] for struc in self.description_struct())
       
   439 
       
   440     @cached
       
   441     def description_struct(self):
       
   442         """return a list describing sequence of results with the same
       
   443         description, e.g. :
       
   444         [[0, 4, ('Bug',)]
       
   445         [[0, 4, ('Bug',), [5, 8, ('Story',)]
       
   446         [[0, 3, ('Project', 'Version',)]]
       
   447         """
       
   448         result = []
       
   449         last = None
       
   450         for i, row in enumerate(self.description):
       
   451             if row != last:
       
   452                 if last is not None:
       
   453                     result[-1][1] = i - 1
       
   454                 result.append( [i, None, row] )
       
   455                 last = row
       
   456         if last is not None:
       
   457             result[-1][1] = i
       
   458         return result
       
   459 
       
   460     @cached
       
   461     def related_entity(self, row, col):
       
   462         """try to get the related entity to extract format information if any"""
       
   463         locate_query_col = col
       
   464         rqlst = self.syntax_tree()
       
   465         etype = self.description[row][col]
       
   466         if self.vreg.schema.eschema(etype).is_final():
       
   467             # final type, find a better (ambiguous) one
       
   468             for i in xrange(len(rqlst.children[0].selection)):
       
   469                 if i == col:
       
   470                     continue
       
   471                 coletype = self.description[row][i]
       
   472                 if coletype is None:
       
   473                     continue
       
   474                 if not self.vreg.schema.eschema(coletype).is_final():
       
   475                     etype = coletype
       
   476                     locate_query_col = i
       
   477                     if len(self.column_types(i)) > 1:
       
   478                         break
       
   479         # UNION query, find the subquery from which this entity has been
       
   480         # found
       
   481         select = rqlst.locate_subquery(locate_query_col, etype, self.args)
       
   482         try:
       
   483             myvar = select.selection[col].variable
       
   484         except AttributeError:
       
   485             # no .selection attribute is available
       
   486             return None, None
       
   487         rel = myvar.main_relation()
       
   488         if rel is not None:
       
   489             index = rel.children[0].variable.selected_index()
       
   490             if index is not None:
       
   491                 return self.get_entity(row, index), rel.r_type
       
   492         return None, None
       
   493 
       
   494     @cached
       
   495     def searched_text(self):
       
   496         """returns the searched text in case of full-text search
       
   497 
       
   498         :return: searched text or `None` if the query is not
       
   499                  a full-text query
       
   500         """
       
   501         rqlst = self.syntax_tree()
       
   502         for rel in rqlst.iget_nodes(nodes.Relation):
       
   503             if rel.r_type == 'has_text':
       
   504                 __, rhs = rel.get_variable_parts()
       
   505                 return rhs.eval(self.args)
       
   506         return None
       
   507         
       
   508 
       
   509 def attr_desc_iterator(rqlst, index=0):
       
   510     """return an iterator on a list of 2-uple (index, attr_relation)
       
   511     localizing attribute relations of the main variable in a result's row
       
   512 
       
   513     :type rqlst: rql.stmts.Select
       
   514     :param rqlst: the RQL syntax tree to describe
       
   515 
       
   516     :return:
       
   517       a generator on (index, relation, target) describing column being
       
   518       attribute of the main variable
       
   519     """
       
   520     main = rqlst.selection[index]
       
   521     for i, term in enumerate(rqlst.selection):
       
   522         if i == index:
       
   523             continue
       
   524         try:
       
   525             # XXX rewritten const
       
   526             var = term.variable
       
   527         except AttributeError:
       
   528             continue
       
   529         #varname = var.name
       
   530         for ref in var.references():
       
   531             rel = ref.relation()
       
   532             if rel is None or rel.is_types_restriction():
       
   533                 continue
       
   534             lhs, rhs = rel.get_variable_parts()
       
   535             if main.is_equivalent(lhs):
       
   536                 if rhs.is_equivalent(term):
       
   537                     yield (i, rel.r_type, 'subject')
       
   538             elif main.is_equivalent(rhs):
       
   539                 if lhs.is_equivalent(term):
       
   540                     yield (i, rel.r_type, 'object')