1 """Helper classes to execute RQL queries on a set of sources, performing |
1 """Helper classes to execute RQL queries on a set of sources, performing |
2 security checking and data aggregation. |
2 security checking and data aggregation. |
3 |
3 |
4 :organization: Logilab |
4 :organization: Logilab |
5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
5 :copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
7 """ |
7 """ |
8 __docformat__ = "restructuredtext en" |
8 __docformat__ = "restructuredtext en" |
9 |
9 |
10 from itertools import repeat |
10 from itertools import repeat |
112 if not vref.name in selected: |
112 if not vref.name in selected: |
113 select.append_selected(vref) |
113 select.append_selected(vref) |
114 colalias = newselect.get_variable(vref.name, len(aliases)) |
114 colalias = newselect.get_variable(vref.name, len(aliases)) |
115 aliases.append(VariableRef(colalias)) |
115 aliases.append(VariableRef(colalias)) |
116 selected.add(vref.name) |
116 selected.add(vref.name) |
117 |
117 |
118 # Plans ####################################################################### |
118 # Plans ####################################################################### |
119 |
119 |
120 class ExecutionPlan(object): |
120 class ExecutionPlan(object): |
121 """the execution model of a rql query, composed of querier steps""" |
121 """the execution model of a rql query, composed of querier steps""" |
122 |
122 |
123 def __init__(self, querier, rqlst, args, session): |
123 def __init__(self, querier, rqlst, args, session): |
124 # original rql syntax tree |
124 # original rql syntax tree |
125 self.rqlst = rqlst |
125 self.rqlst = rqlst |
126 self.args = args or {} |
126 self.args = args or {} |
127 # session executing the query |
127 # session executing the query |
135 # various resource accesors |
135 # various resource accesors |
136 self.querier = querier |
136 self.querier = querier |
137 self.schema = querier.schema |
137 self.schema = querier.schema |
138 self.rqlhelper = querier._rqlhelper |
138 self.rqlhelper = querier._rqlhelper |
139 self.sqlannotate = querier.sqlgen_annotate |
139 self.sqlannotate = querier.sqlgen_annotate |
140 |
140 |
141 def annotate_rqlst(self): |
141 def annotate_rqlst(self): |
142 if not self.rqlst.annotated: |
142 if not self.rqlst.annotated: |
143 self.rqlhelper.annotate(self.rqlst) |
143 self.rqlhelper.annotate(self.rqlst) |
144 |
144 |
145 def add_step(self, step): |
145 def add_step(self, step): |
146 """add a step to the plan""" |
146 """add a step to the plan""" |
147 self.steps.append(step) |
147 self.steps.append(step) |
148 |
148 |
149 def clean(self): |
149 def clean(self): |
150 """remove temporary tables""" |
150 """remove temporary tables""" |
151 self.syssource.clean_temp_data(self.session, self.temp_tables) |
151 self.syssource.clean_temp_data(self.session, self.temp_tables) |
152 |
152 |
153 def sqlexec(self, sql, args=None): |
153 def sqlexec(self, sql, args=None): |
154 return self.syssource.sqlexec(self.session, sql, args) |
154 return self.syssource.sqlexec(self.session, sql, args) |
155 |
155 |
156 def execute(self): |
156 def execute(self): |
157 """execute a plan and return resulting rows""" |
157 """execute a plan and return resulting rows""" |
158 try: |
158 try: |
159 for step in self.steps: |
159 for step in self.steps: |
160 result = step.execute() |
160 result = step.execute() |
161 # the latest executed step contains the full query result |
161 # the latest executed step contains the full query result |
162 return result |
162 return result |
163 finally: |
163 finally: |
164 self.clean() |
164 self.clean() |
165 |
165 |
166 def init_temp_table(self, table, selected, sol): |
166 def init_temp_table(self, table, selected, sol): |
167 """initialize sql schema and variable map for a temporary table which |
167 """initialize sql schema and variable map for a temporary table which |
168 will be used to store result for the given rqlst |
168 will be used to store result for the given rqlst |
169 """ |
169 """ |
170 try: |
170 try: |
173 except KeyError: |
173 except KeyError: |
174 sqlschema, outputmap = self.syssource.temp_table_def(selected, sol, |
174 sqlschema, outputmap = self.syssource.temp_table_def(selected, sol, |
175 table) |
175 table) |
176 self.temp_tables[table] = [outputmap, sqlschema, False] |
176 self.temp_tables[table] = [outputmap, sqlschema, False] |
177 return outputmap |
177 return outputmap |
178 |
178 |
179 def create_temp_table(self, table): |
179 def create_temp_table(self, table): |
180 """create a temporary table to store result for the given rqlst""" |
180 """create a temporary table to store result for the given rqlst""" |
181 if not self.temp_tables[table][-1]: |
181 if not self.temp_tables[table][-1]: |
182 sqlschema = self.temp_tables[table][1] |
182 sqlschema = self.temp_tables[table][1] |
183 self.syssource.create_temp_table(self.session, table, sqlschema) |
183 self.syssource.create_temp_table(self.session, table, sqlschema) |
184 self.temp_tables[table][-1] = True |
184 self.temp_tables[table][-1] = True |
185 |
185 |
186 def preprocess(self, union, security=True): |
186 def preprocess(self, union, security=True): |
187 """insert security when necessary then annotate rql st for sql generation |
187 """insert security when necessary then annotate rql st for sql generation |
188 |
188 |
189 return rqlst to actually execute |
189 return rqlst to actually execute |
190 """ |
190 """ |
191 #if server.DEBUG: |
191 #if server.DEBUG: |
192 # print '------- preprocessing', union.as_string('utf8') |
192 # print '------- preprocessing', union.as_string('utf8') |
193 noinvariant = set() |
193 noinvariant = set() |
277 |
277 |
278 solutions where a variable has a type which the user can't definitly read |
278 solutions where a variable has a type which the user can't definitly read |
279 are removed, else if the user may read it (eg if an rql expression is |
279 are removed, else if the user may read it (eg if an rql expression is |
280 defined for the "read" permission of the related type), the local checks |
280 defined for the "read" permission of the related type), the local checks |
281 dict for the solution is updated |
281 dict for the solution is updated |
282 |
282 |
283 return a dict with entries for each different local check necessary, |
283 return a dict with entries for each different local check necessary, |
284 with associated solutions as value. A local check is defined by a list |
284 with associated solutions as value. A local check is defined by a list |
285 of 2-uple, with variable name as first item and the necessary rql |
285 of 2-uple, with variable name as first item and the necessary rql |
286 expression as second item for each variable which has to be checked. |
286 expression as second item for each variable which has to be checked. |
287 So solutions which don't require local checks will be associated to |
287 So solutions which don't require local checks will be associated to |
344 sol[newvarname] = nvartype |
344 sol[newvarname] = nvartype |
345 select.clean_solutions(solutions) |
345 select.clean_solutions(solutions) |
346 self.rqlhelper.annotate(rqlst) |
346 self.rqlhelper.annotate(rqlst) |
347 self.preprocess(rqlst, security=False) |
347 self.preprocess(rqlst, security=False) |
348 return rqlst |
348 return rqlst |
349 |
349 |
350 class InsertPlan(ExecutionPlan): |
350 class InsertPlan(ExecutionPlan): |
351 """an execution model specific to the INSERT rql query |
351 """an execution model specific to the INSERT rql query |
352 """ |
352 """ |
353 |
353 |
354 def __init__(self, querier, rqlst, args, session): |
354 def __init__(self, querier, rqlst, args, session): |
355 ExecutionPlan.__init__(self, querier, rqlst, args, session) |
355 ExecutionPlan.__init__(self, querier, rqlst, args, session) |
356 # save originaly selected variable, we may modify this |
356 # save originaly selected variable, we may modify this |
357 # dictionary for substitution (query parameters) |
357 # dictionary for substitution (query parameters) |
358 self.selected = rqlst.selection |
358 self.selected = rqlst.selection |
385 if isinstance(rhs, Constant) and not rhs.uid: |
385 if isinstance(rhs, Constant) and not rhs.uid: |
386 # add constant values to entity def |
386 # add constant values to entity def |
387 value = rhs.eval(self.args) |
387 value = rhs.eval(self.args) |
388 eschema = edef.e_schema |
388 eschema = edef.e_schema |
389 attrtype = eschema.subject_relation(rtype).objects(eschema)[0] |
389 attrtype = eschema.subject_relation(rtype).objects(eschema)[0] |
390 if attrtype == 'Password' and isinstance(value, unicode): |
390 if attrtype == 'Password' and isinstance(value, unicode): |
391 value = value.encode('UTF8') |
391 value = value.encode('UTF8') |
392 edef[rtype] = value |
392 edef[rtype] = value |
393 elif to_build.has_key(str(rhs)): |
393 elif to_build.has_key(str(rhs)): |
394 # create a relation between two newly created variables |
394 # create a relation between two newly created variables |
395 self.add_relation_def((edef, rtype, to_build[rhs.name])) |
395 self.add_relation_def((edef, rtype, to_build[rhs.name])) |
396 else: |
396 else: |
397 to_select.setdefault(edef, []).append( (rtype, rhs, 0) ) |
397 to_select.setdefault(edef, []).append( (rtype, rhs, 0) ) |
398 return to_select |
398 return to_select |
399 |
399 |
400 |
400 |
401 def add_entity_def(self, edef): |
401 def add_entity_def(self, edef): |
402 """add an entity definition to build""" |
402 """add an entity definition to build""" |
403 edef.querier_pending_relations = {} |
403 edef.querier_pending_relations = {} |
404 self.e_defs[-1].append(edef) |
404 self.e_defs[-1].append(edef) |
405 |
405 |
406 def add_relation_def(self, rdef): |
406 def add_relation_def(self, rdef): |
407 """add an relation definition to build""" |
407 """add an relation definition to build""" |
408 self.r_defs.append(rdef) |
408 self.r_defs.append(rdef) |
409 if not isinstance(rdef[0], int): |
409 if not isinstance(rdef[0], int): |
410 self._r_subj_index.setdefault(rdef[0], []).append(rdef) |
410 self._r_subj_index.setdefault(rdef[0], []).append(rdef) |
411 if not isinstance(rdef[2], int): |
411 if not isinstance(rdef[2], int): |
412 self._r_obj_index.setdefault(rdef[2], []).append(rdef) |
412 self._r_obj_index.setdefault(rdef[2], []).append(rdef) |
413 |
413 |
414 def substitute_entity_def(self, edef, edefs): |
414 def substitute_entity_def(self, edef, edefs): |
415 """substitute an incomplete entity definition by a list of complete |
415 """substitute an incomplete entity definition by a list of complete |
416 equivalents |
416 equivalents |
417 |
417 |
418 e.g. on queries such as :: |
418 e.g. on queries such as :: |
419 INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y |
419 INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y |
420 WHERE U login 'admin', U login N |
420 WHERE U login 'admin', U login N |
421 |
421 |
422 X will be inserted as many times as U exists, and so the X travaille Y |
422 X will be inserted as many times as U exists, and so the X travaille Y |
453 result = [] |
453 result = [] |
454 for exp_rdef in expanded: |
454 for exp_rdef in expanded: |
455 for edef in edefs: |
455 for edef in edefs: |
456 result.append( (exp_rdef[0], exp_rdef[1], edef) ) |
456 result.append( (exp_rdef[0], exp_rdef[1], edef) ) |
457 self._expanded_r_defs[rdef] = result |
457 self._expanded_r_defs[rdef] = result |
458 |
458 |
459 def _expanded(self, rdef): |
459 def _expanded(self, rdef): |
460 """return expanded value for the given relation definition""" |
460 """return expanded value for the given relation definition""" |
461 try: |
461 try: |
462 return self._expanded_r_defs[rdef] |
462 return self._expanded_r_defs[rdef] |
463 except KeyError: |
463 except KeyError: |
464 self.r_defs.remove(rdef) |
464 self.r_defs.remove(rdef) |
465 return [rdef] |
465 return [rdef] |
466 |
466 |
467 def relation_defs(self): |
467 def relation_defs(self): |
468 """return the list for relation definitions to insert""" |
468 """return the list for relation definitions to insert""" |
469 for rdefs in self._expanded_r_defs.values(): |
469 for rdefs in self._expanded_r_defs.values(): |
470 for rdef in rdefs: |
470 for rdef in rdefs: |
471 yield rdef |
471 yield rdef |
472 for rdef in self.r_defs: |
472 for rdef in self.r_defs: |
473 yield rdef |
473 yield rdef |
474 |
474 |
475 def insert_entity_defs(self): |
475 def insert_entity_defs(self): |
476 """return eids of inserted entities in a suitable form for the resulting |
476 """return eids of inserted entities in a suitable form for the resulting |
477 result set, e.g.: |
477 result set, e.g.: |
478 |
478 |
479 e.g. on queries such as :: |
479 e.g. on queries such as :: |
480 INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y |
480 INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y |
481 WHERE U login 'admin', U login N |
481 WHERE U login 'admin', U login N |
482 |
482 |
483 if there is two entities matching U, the result set will look like |
483 if there is two entities matching U, the result set will look like |
488 results = [] |
488 results = [] |
489 for row in self.e_defs: |
489 for row in self.e_defs: |
490 results.append([repo.glob_add_entity(session, edef) |
490 results.append([repo.glob_add_entity(session, edef) |
491 for edef in row]) |
491 for edef in row]) |
492 return results |
492 return results |
493 |
493 |
494 def insert_relation_defs(self): |
494 def insert_relation_defs(self): |
495 session = self.session |
495 session = self.session |
496 repo = session.repo |
496 repo = session.repo |
497 for subj, rtype, obj in self.relation_defs(): |
497 for subj, rtype, obj in self.relation_defs(): |
498 # if a string is given into args instead of an int, we get it here |
498 # if a string is given into args instead of an int, we get it here |
512 repo.glob_add_relation(session, subj, rtype, obj) |
512 repo.glob_add_relation(session, subj, rtype, obj) |
513 |
513 |
514 |
514 |
515 class QuerierHelper(object): |
515 class QuerierHelper(object): |
516 """helper class to execute rql queries, putting all things together""" |
516 """helper class to execute rql queries, putting all things together""" |
517 |
517 |
518 def __init__(self, repo, schema): |
518 def __init__(self, repo, schema): |
519 # system info helper |
519 # system info helper |
520 self._repo = repo |
520 self._repo = repo |
521 # application schema |
521 # application schema |
522 self.set_schema(schema) |
522 self.set_schema(schema) |
523 |
523 |
524 def set_schema(self, schema): |
524 def set_schema(self, schema): |
525 self.schema = schema |
525 self.schema = schema |
526 # rql parsing / analysing helper |
526 # rql parsing / analysing helper |
527 self._rqlhelper = RQLHelper(schema, special_relations={'eid': 'uid', |
527 self._rqlhelper = RQLHelper(schema, special_relations={'eid': 'uid', |
528 'has_text': 'fti'}) |
528 'has_text': 'fti'}) |
529 self._rql_cache = Cache(self._repo.config['rql-cache-size']) |
529 self._rql_cache = Cache(self._repo.config['rql-cache-size']) |
530 self.cache_hit, self.cache_miss = 0, 0 |
530 self.cache_hit, self.cache_miss = 0, 0 |
531 # rql planner |
531 # rql planner |
532 # note: don't use repo.sources, may not be built yet, and also "admin" |
532 # note: don't use repo.sources, may not be built yet, and also "admin" |
533 # isn't an actual source |
533 # isn't an actual source |
534 if len([uri for uri in self._repo.config.sources() if uri != 'admin']) < 2: |
534 if len([uri for uri in self._repo.config.sources() if uri != 'admin']) < 2: |
535 from cubicweb.server.ssplanner import SSPlanner |
535 from cubicweb.server.ssplanner import SSPlanner |
536 self._planner = SSPlanner(schema, self._rqlhelper) |
536 self._planner = SSPlanner(schema, self._rqlhelper) |
537 else: |
537 else: |
538 from cubicweb.server.msplanner import MSPlanner |
538 from cubicweb.server.msplanner import MSPlanner |
539 self._planner = MSPlanner(schema, self._rqlhelper) |
539 self._planner = MSPlanner(schema, self._rqlhelper) |
540 # sql generation annotator |
540 # sql generation annotator |
541 self.sqlgen_annotate = SQLGenAnnotator(schema).annotate |
541 self.sqlgen_annotate = SQLGenAnnotator(schema).annotate |
542 |
542 |
543 def parse(self, rql, annotate=False): |
543 def parse(self, rql, annotate=False): |
544 """return a rql syntax tree for the given rql""" |
544 """return a rql syntax tree for the given rql""" |
545 try: |
545 try: |
546 return self._rqlhelper.parse(unicode(rql), annotate=annotate) |
546 return self._rqlhelper.parse(unicode(rql), annotate=annotate) |
547 except UnicodeError: |
547 except UnicodeError: |
557 def plan_factory(self, rqlst, args, session): |
557 def plan_factory(self, rqlst, args, session): |
558 """create an execution plan for an INSERT RQL query""" |
558 """create an execution plan for an INSERT RQL query""" |
559 if rqlst.TYPE == 'insert': |
559 if rqlst.TYPE == 'insert': |
560 return InsertPlan(self, rqlst, args, session) |
560 return InsertPlan(self, rqlst, args, session) |
561 return ExecutionPlan(self, rqlst, args, session) |
561 return ExecutionPlan(self, rqlst, args, session) |
562 |
562 |
563 def execute(self, session, rql, args=None, eid_key=None, build_descr=True): |
563 def execute(self, session, rql, args=None, eid_key=None, build_descr=True): |
564 """execute a rql query, return resulting rows and their description in |
564 """execute a rql query, return resulting rows and their description in |
565 a `ResultSet` object |
565 a `ResultSet` object |
566 |
566 |
567 * `rql` should be an unicode string or a plain ascii string |
567 * `rql` should be an unicode string or a plain ascii string |
576 and resolve some ambiguity in the possible solutions infered for each |
576 and resolve some ambiguity in the possible solutions infered for each |
577 variable in the query. |
577 variable in the query. |
578 |
578 |
579 on INSERT queries, there will be on row with the eid of each inserted |
579 on INSERT queries, there will be on row with the eid of each inserted |
580 entity |
580 entity |
581 |
581 |
582 result for DELETE and SET queries is undefined yet |
582 result for DELETE and SET queries is undefined yet |
583 |
583 |
584 to maximize the rql parsing/analyzing cache performance, you should |
584 to maximize the rql parsing/analyzing cache performance, you should |
585 always use substitute arguments in queries (eg avoid query such as |
585 always use substitute arguments in queries (eg avoid query such as |
586 'Any X WHERE X eid 123'!) |
586 'Any X WHERE X eid 123'!) |