|
1 """The `ResultSet` class which is returned as result of a rql query |
|
2 |
|
3 :organization: Logilab |
|
4 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
5 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
6 """ |
|
7 __docformat__ = "restructuredtext en" |
|
8 |
|
9 from logilab.common.decorators import cached, clear_cache, copy_cache |
|
10 |
|
11 from rql import nodes |
|
12 |
|
13 from cubicweb import NotAnEntity |
|
14 |
|
15 |
|
16 class ResultSet(object): |
|
17 """a result set wrap a RQL query result. This object implements a partial |
|
18 list protocol to allow direct use as a list of result rows. |
|
19 |
|
20 :type rowcount: int |
|
21 :ivar rowcount: number of rows in the result |
|
22 |
|
23 :type rows: list |
|
24 :ivar rows: list of rows of result |
|
25 |
|
26 :type description: list |
|
27 :ivar description: |
|
28 result's description, using the same structure as the result itself |
|
29 |
|
30 :type rql: str or unicode |
|
31 :ivar rql: the original RQL query string |
|
32 """ |
|
33 def __init__(self, results, rql, args=None, description=(), cachekey=None, |
|
34 rqlst=None): |
|
35 self.rows = results |
|
36 self.rowcount = results and len(results) or 0 |
|
37 # original query and arguments |
|
38 self.rql = rql |
|
39 self.args = args |
|
40 self.cachekey = cachekey |
|
41 # entity types for each cell (same shape as rows) |
|
42 # maybe discarded if specified when the query has been executed |
|
43 self.description = description |
|
44 # parsed syntax tree |
|
45 if rqlst is not None: |
|
46 rqlst.schema = None # reset schema in case of pyro transfert |
|
47 self._rqlst = rqlst |
|
48 # set to (limit, offset) when a result set is limited using the |
|
49 # .limit method |
|
50 self.limited = None |
|
51 # set by the cursor which returned this resultset |
|
52 self.vreg = None |
|
53 self.req = None |
|
54 |
|
55 def __str__(self): |
|
56 if not self.rows: |
|
57 return '<empty resultset %s>' % self.rql |
|
58 return '<resultset %s (%s rows)>' % (self.rql, len(self.rows)) |
|
59 |
|
60 def __repr__(self): |
|
61 if not self.rows: |
|
62 return '<empty resultset for %s>' % self.rql |
|
63 if not self.description: |
|
64 return '<resultset %s: %s>' % (self.rql, '\n'.join(str(r) for r in self.rows)) |
|
65 return '<resultset %s: %s>' % (self.rql, |
|
66 '\n'.join('%s (%s)' % (r, d) |
|
67 for r, d in zip(self.rows, self.description))) |
|
68 |
|
69 @cached |
|
70 def possible_actions(self): |
|
71 return self.vreg.possible_vobjects('actions', self.req, self) |
|
72 |
|
73 def __len__(self): |
|
74 """returns the result set's size""" |
|
75 return self.rowcount |
|
76 |
|
77 def __nonzero__(self): |
|
78 return self.rowcount |
|
79 |
|
80 def __getitem__(self, i): |
|
81 """returns the ith element of the result set""" |
|
82 return self.rows[i] #ResultSetRow(self.rows[i]) |
|
83 |
|
84 def __getslice__(self, i, j): |
|
85 """returns slice [i:j] of the result set""" |
|
86 return self.rows[i:j] |
|
87 |
|
88 def __iter__(self): |
|
89 """Returns an iterator over rows""" |
|
90 return iter(self.rows) |
|
91 |
|
92 def __add__(self, rset): |
|
93 # XXX buggy implementation (.rql and .args attributes at least much |
|
94 # probably differ) |
|
95 # at least rql could be fixed now that we have union and sub-queries |
|
96 # but I tend to think that since we have that, we should not need this |
|
97 # method anymore (syt) |
|
98 rset = ResultSet(self.rows+rset.rows, self.rql, self.args, |
|
99 self.description +rset.description) |
|
100 return self.req.decorate_rset(rset) |
|
101 |
|
102 def _prepare_copy(self, rows, descr): |
|
103 rset = ResultSet(rows, self.rql, self.args, descr) |
|
104 return self.req.decorate_rset(rset) |
|
105 |
|
106 def transformed_rset(self, transformcb): |
|
107 """ the result set according to a given column types |
|
108 |
|
109 :type transormcb: callable(row, desc) |
|
110 :param transformcb: |
|
111 a callable which should take a row and its type description as |
|
112 parameters, and return the transformed row and type description. |
|
113 |
|
114 |
|
115 :type col: int |
|
116 :param col: the column index |
|
117 |
|
118 :rtype: `ResultSet` |
|
119 """ |
|
120 rows, descr = [], [] |
|
121 rset = self._prepare_copy(rows, descr) |
|
122 for row, desc in zip(self.rows, self.description): |
|
123 nrow, ndesc = transformcb(row, desc) |
|
124 if ndesc: # transformcb returns None for ndesc to skip that row |
|
125 rows.append(nrow) |
|
126 descr.append(ndesc) |
|
127 rset.rowcount = len(rows) |
|
128 return rset |
|
129 |
|
130 def filtered_rset(self, filtercb, col=0): |
|
131 """filter the result set according to a given filtercb |
|
132 |
|
133 :type filtercb: callable(entity) |
|
134 :param filtercb: |
|
135 a callable which should take an entity as argument and return |
|
136 False if it should be skipped, else True |
|
137 |
|
138 :type col: int |
|
139 :param col: the column index |
|
140 |
|
141 :rtype: `ResultSet` |
|
142 """ |
|
143 rows, descr = [], [] |
|
144 rset = self._prepare_copy(rows, descr) |
|
145 for i in xrange(len(self)): |
|
146 if not filtercb(self.get_entity(i, col)): |
|
147 continue |
|
148 rows.append(self.rows[i]) |
|
149 descr.append(self.description[i]) |
|
150 rset.rowcount = len(rows) |
|
151 return rset |
|
152 |
|
153 |
|
154 def sorted_rset(self, keyfunc, reverse=False, col=0): |
|
155 """sorts the result set according to a given keyfunc |
|
156 |
|
157 :type keyfunc: callable(entity) |
|
158 :param keyfunc: |
|
159 a callable which should take an entity as argument and return |
|
160 the value used to compare and sort |
|
161 |
|
162 :type reverse: bool |
|
163 :param reverse: if the result should be reversed |
|
164 |
|
165 :type col: int |
|
166 :param col: the column index. if col = -1, the whole row are used |
|
167 |
|
168 :rtype: `ResultSet` |
|
169 """ |
|
170 rows, descr = [], [] |
|
171 rset = self._prepare_copy(rows, descr) |
|
172 if col >= 0: |
|
173 entities = sorted(enumerate(self.entities(col)), |
|
174 key=lambda (i, e): keyfunc(e), reverse=reverse) |
|
175 else: |
|
176 entities = sorted(enumerate(self), |
|
177 key=lambda (i, e): keyfunc(e), reverse=reverse) |
|
178 |
|
179 for index, entity in entities: |
|
180 rows.append(self.rows[index]) |
|
181 descr.append(self.description[index]) |
|
182 rset.rowcount = len(rows) |
|
183 return rset |
|
184 |
|
185 def split_rset(self, keyfunc=None, col=0, return_dict=False): |
|
186 """Splits the result set in multiple result set according to a given key |
|
187 |
|
188 :type keyfunc: callable(entity or FinalType) |
|
189 :param keyfunc: |
|
190 a callable which should take a value of the rset in argument and |
|
191 return the value used to group the value. If not define, raw value |
|
192 of the specified columns is used. |
|
193 |
|
194 :type col: int |
|
195 :param col: the column index. if col = -1, the whole row are used |
|
196 |
|
197 :type return_dict: Boolean |
|
198 :param return_dict: If true, the function return a mapping |
|
199 (key -> rset) instead of a list of rset |
|
200 |
|
201 :rtype: List of `ResultSet` or mapping of `ResultSet` |
|
202 |
|
203 """ |
|
204 result = [] |
|
205 mapping = {} |
|
206 for idx, line in enumerate(self): |
|
207 if col >= 0: |
|
208 try: |
|
209 key = self.get_entity(idx,col) |
|
210 except NotAnEntity: |
|
211 key = line[col] |
|
212 else: |
|
213 key = line |
|
214 if keyfunc is not None: |
|
215 key = keyfunc(key) |
|
216 |
|
217 if key not in mapping: |
|
218 rows, descr = [], [] |
|
219 rset = self._prepare_copy(rows, descr) |
|
220 mapping[key] = rset |
|
221 result.append(rset) |
|
222 else: |
|
223 rset = mapping[key] |
|
224 rset.rows.append(self.rows[idx]) |
|
225 rset.description.append(self.description[idx]) |
|
226 |
|
227 |
|
228 for rset in result: |
|
229 rset.rowcount = len(rset.rows) |
|
230 if return_dict: |
|
231 return mapping |
|
232 else: |
|
233 return result |
|
234 |
|
235 def limit(self, limit, offset=0, inplace=False): |
|
236 """limit the result set to the given number of rows optionaly starting |
|
237 from an index different than 0 |
|
238 |
|
239 :type limit: int |
|
240 :param limit: the maximum number of results |
|
241 |
|
242 :type offset: int |
|
243 :param offset: the offset index |
|
244 |
|
245 :type inplace: bool |
|
246 :param inplace: |
|
247 if true, the result set is modified in place, else a new result set |
|
248 is returned and the original is left unmodified |
|
249 |
|
250 :rtype: `ResultSet` |
|
251 """ |
|
252 stop = limit+offset |
|
253 rows = self.rows[offset:stop] |
|
254 descr = self.description[offset:stop] |
|
255 if inplace: |
|
256 rset = self |
|
257 rset.rows, rset.description = rows, descr |
|
258 rset.rowcount = len(rows) |
|
259 clear_cache(rset, 'description_struct') |
|
260 if offset: |
|
261 clear_cache(rset, 'get_entity') |
|
262 # we also have to fix/remove from the request entity cache entities |
|
263 # which get a wrong rset reference by this limit call |
|
264 for entity in self.req.cached_entities(): |
|
265 if entity.rset is self: |
|
266 if offset <= entity.row < stop: |
|
267 entity.row = entity.row - offset |
|
268 else: |
|
269 self.req.drop_entity_cache(entity.eid) |
|
270 else: |
|
271 rset = self._prepare_copy(rows, descr) |
|
272 if not offset: |
|
273 # can copy built entity caches |
|
274 copy_cache(rset, 'get_entity', self) |
|
275 rset.limited = (limit, offset) |
|
276 return rset |
|
277 |
|
278 def printable_rql(self, encoded=False): |
|
279 """return the result set's origin rql as a string, with arguments |
|
280 substitued |
|
281 """ |
|
282 encoding = self.req.encoding |
|
283 rqlstr = self.syntax_tree().as_string(encoding, self.args) |
|
284 # sounds like we get encoded or unicode string due to a bug in as_string |
|
285 if not encoded: |
|
286 if isinstance(rqlstr, unicode): |
|
287 return rqlstr |
|
288 return unicode(rqlstr, encoding) |
|
289 else: |
|
290 if isinstance(rqlstr, unicode): |
|
291 return rqlstr.encode(encoding) |
|
292 return rqlstr |
|
293 |
|
294 # client helper methods ################################################### |
|
295 |
|
296 def entities(self, col=0): |
|
297 """iter on entities with eid in the `col` column of the result set""" |
|
298 for i in xrange(len(self)): |
|
299 # may have None values in case of outer join (or aggregat on eid |
|
300 # hacks) |
|
301 if self.rows[i][col] is not None: |
|
302 yield self.get_entity(i, col) |
|
303 |
|
304 @cached |
|
305 def get_entity(self, row, col=None): |
|
306 """special method for query retreiving a single entity, returns a |
|
307 partially initialized Entity instance. |
|
308 |
|
309 WARNING: due to the cache wrapping this function, you should NEVER |
|
310 give row as a named parameter (i.e. rset.get_entity(req, 0) |
|
311 is OK but rset.get_entity(row=0, req=req) isn't |
|
312 |
|
313 :type row,col: int, int |
|
314 :param row,col: |
|
315 row and col numbers localizing the entity among the result's table |
|
316 |
|
317 :return: the partially initialized `Entity` instance |
|
318 """ |
|
319 if col is None: |
|
320 from warnings import warn |
|
321 msg = 'col parameter will become mandatory in future version' |
|
322 warn(msg, DeprecationWarning, stacklevel=3) |
|
323 col = 0 |
|
324 etype = self.description[row][col] |
|
325 try: |
|
326 eschema = self.vreg.schema.eschema(etype) |
|
327 if eschema.is_final(): |
|
328 raise NotAnEntity(etype) |
|
329 except KeyError: |
|
330 raise NotAnEntity(etype) |
|
331 return self._build_entity(row, col) |
|
332 |
|
333 def _build_entity(self, row, col, _localcache=None): |
|
334 """internal method to get a single entity, returns a |
|
335 partially initialized Entity instance. |
|
336 |
|
337 partially means that only attributes selected in the RQL |
|
338 query will be directly assigned to the entity. |
|
339 |
|
340 :type row,col: int, int |
|
341 :param row,col: |
|
342 row and col numbers localizing the entity among the result's table |
|
343 |
|
344 :return: the partially initialized `Entity` instance |
|
345 """ |
|
346 req = self.req |
|
347 if req is None: |
|
348 raise AssertionError('dont call get_entity with no req on the result set') |
|
349 rowvalues = self.rows[row] |
|
350 eid = rowvalues[col] |
|
351 assert eid is not None |
|
352 # return cached entity if exists. This also avoids potential recursion |
|
353 # XXX should we consider updating a cached entity with possible |
|
354 # new attributes found in this resultset ? |
|
355 try: |
|
356 if hasattr(req, 'is_super_session'): |
|
357 # this is a Session object which is not caching entities, so we |
|
358 # have to use a local cache to avoid recursion pb |
|
359 if _localcache is None: |
|
360 _localcache = {} |
|
361 return _localcache[eid] |
|
362 else: |
|
363 return req.entity_cache(eid) |
|
364 except KeyError: |
|
365 pass |
|
366 # build entity instance |
|
367 etype = self.description[row][col] |
|
368 entity = self.vreg.etype_class(etype)(req, self, row, col) |
|
369 entity.set_eid(eid) |
|
370 # cache entity |
|
371 if _localcache is not None: |
|
372 _localcache[eid] = entity |
|
373 req.set_entity_cache(entity) |
|
374 eschema = entity.e_schema |
|
375 # try to complete the entity if there are some additional columns |
|
376 if len(rowvalues) > 1: |
|
377 rqlst = self.syntax_tree() |
|
378 if rqlst.TYPE == 'select': |
|
379 # UNION query, find the subquery from which this entity has been |
|
380 # found |
|
381 rqlst = rqlst.locate_subquery(col, etype, self.args) |
|
382 # take care, due to outer join support, we may find None |
|
383 # values for non final relation |
|
384 for i, attr, x in attr_desc_iterator(rqlst, col): |
|
385 if x == 'subject': |
|
386 rschema = eschema.subject_relation(attr) |
|
387 if rschema.is_final(): |
|
388 entity[attr] = rowvalues[i] |
|
389 continue |
|
390 tetype = rschema.objects(etype)[0] |
|
391 card = rschema.rproperty(etype, tetype, 'cardinality')[0] |
|
392 else: |
|
393 rschema = eschema.object_relation(attr) |
|
394 tetype = rschema.subjects(etype)[0] |
|
395 card = rschema.rproperty(tetype, etype, 'cardinality')[1] |
|
396 # only keep value if it can't be multivalued |
|
397 if card in '1?': |
|
398 if rowvalues[i] is None: |
|
399 if x == 'subject': |
|
400 rql = 'Any Y WHERE X %s Y, X eid %s' |
|
401 else: |
|
402 rql = 'Any Y WHERE Y %s X, X eid %s' |
|
403 rrset = ResultSet([], rql % (attr, entity.eid)) |
|
404 req.decorate_rset(rrset) |
|
405 else: |
|
406 rrset = self._build_entity(row, i, _localcache).as_rset() |
|
407 entity.set_related_cache(attr, x, rrset) |
|
408 return entity |
|
409 |
|
410 @cached |
|
411 def syntax_tree(self): |
|
412 """get the syntax tree for the source query. |
|
413 |
|
414 :rtype: rql.stmts.Statement |
|
415 :return: the RQL syntax tree of the originating query |
|
416 """ |
|
417 if self._rqlst: |
|
418 rqlst = self._rqlst.copy() |
|
419 # to avoid transport overhead when pyro is used, the schema has been |
|
420 # unset from the syntax tree |
|
421 rqlst.schema = self.vreg.schema |
|
422 self.vreg.rqlhelper.annotate(rqlst) |
|
423 else: |
|
424 rqlst = self.vreg.parse(self.req, self.rql, self.args) |
|
425 return rqlst |
|
426 |
|
427 @cached |
|
428 def column_types(self, col): |
|
429 """return the list of different types in the column with the given col |
|
430 index default to 0 (ie the first column) |
|
431 |
|
432 :type col: int |
|
433 :param col: the index of the desired column |
|
434 |
|
435 :rtype: list |
|
436 :return: the different entities type found in the column |
|
437 """ |
|
438 return frozenset(struc[-1][col] for struc in self.description_struct()) |
|
439 |
|
440 @cached |
|
441 def description_struct(self): |
|
442 """return a list describing sequence of results with the same |
|
443 description, e.g. : |
|
444 [[0, 4, ('Bug',)] |
|
445 [[0, 4, ('Bug',), [5, 8, ('Story',)] |
|
446 [[0, 3, ('Project', 'Version',)]] |
|
447 """ |
|
448 result = [] |
|
449 last = None |
|
450 for i, row in enumerate(self.description): |
|
451 if row != last: |
|
452 if last is not None: |
|
453 result[-1][1] = i - 1 |
|
454 result.append( [i, None, row] ) |
|
455 last = row |
|
456 if last is not None: |
|
457 result[-1][1] = i |
|
458 return result |
|
459 |
|
460 @cached |
|
461 def related_entity(self, row, col): |
|
462 """try to get the related entity to extract format information if any""" |
|
463 locate_query_col = col |
|
464 rqlst = self.syntax_tree() |
|
465 etype = self.description[row][col] |
|
466 if self.vreg.schema.eschema(etype).is_final(): |
|
467 # final type, find a better (ambiguous) one |
|
468 for i in xrange(len(rqlst.children[0].selection)): |
|
469 if i == col: |
|
470 continue |
|
471 coletype = self.description[row][i] |
|
472 if coletype is None: |
|
473 continue |
|
474 if not self.vreg.schema.eschema(coletype).is_final(): |
|
475 etype = coletype |
|
476 locate_query_col = i |
|
477 if len(self.column_types(i)) > 1: |
|
478 break |
|
479 # UNION query, find the subquery from which this entity has been |
|
480 # found |
|
481 select = rqlst.locate_subquery(locate_query_col, etype, self.args) |
|
482 try: |
|
483 myvar = select.selection[col].variable |
|
484 except AttributeError: |
|
485 # no .selection attribute is available |
|
486 return None, None |
|
487 rel = myvar.main_relation() |
|
488 if rel is not None: |
|
489 index = rel.children[0].variable.selected_index() |
|
490 if index is not None: |
|
491 return self.get_entity(row, index), rel.r_type |
|
492 return None, None |
|
493 |
|
494 @cached |
|
495 def searched_text(self): |
|
496 """returns the searched text in case of full-text search |
|
497 |
|
498 :return: searched text or `None` if the query is not |
|
499 a full-text query |
|
500 """ |
|
501 rqlst = self.syntax_tree() |
|
502 for rel in rqlst.iget_nodes(nodes.Relation): |
|
503 if rel.r_type == 'has_text': |
|
504 __, rhs = rel.get_variable_parts() |
|
505 return rhs.eval(self.args) |
|
506 return None |
|
507 |
|
508 |
|
509 def attr_desc_iterator(rqlst, index=0): |
|
510 """return an iterator on a list of 2-uple (index, attr_relation) |
|
511 localizing attribute relations of the main variable in a result's row |
|
512 |
|
513 :type rqlst: rql.stmts.Select |
|
514 :param rqlst: the RQL syntax tree to describe |
|
515 |
|
516 :return: |
|
517 a generator on (index, relation, target) describing column being |
|
518 attribute of the main variable |
|
519 """ |
|
520 main = rqlst.selection[index] |
|
521 for i, term in enumerate(rqlst.selection): |
|
522 if i == index: |
|
523 continue |
|
524 try: |
|
525 # XXX rewritten const |
|
526 var = term.variable |
|
527 except AttributeError: |
|
528 continue |
|
529 #varname = var.name |
|
530 for ref in var.references(): |
|
531 rel = ref.relation() |
|
532 if rel is None or rel.is_types_restriction(): |
|
533 continue |
|
534 lhs, rhs = rel.get_variable_parts() |
|
535 if main.is_equivalent(lhs): |
|
536 if rhs.is_equivalent(term): |
|
537 yield (i, rel.r_type, 'subject') |
|
538 elif main.is_equivalent(rhs): |
|
539 if lhs.is_equivalent(term): |
|
540 yield (i, rel.r_type, 'object') |