server/querier.py
branchtls-sprint
changeset 1802 d628defebc17
parent 1133 8a409ea0c9ec
child 1954 9b20f3504af8
equal deleted inserted replaced
1801:672acc730ce5 1802:d628defebc17
     1 """Helper classes to execute RQL queries on a set of sources, performing
     1 """Helper classes to execute RQL queries on a set of sources, performing
     2 security checking and data aggregation.
     2 security checking and data aggregation.
     3 
     3 
     4 :organization: Logilab
     4 :organization: Logilab
     5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     5 :copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
     6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
     7 """
     7 """
     8 __docformat__ = "restructuredtext en"
     8 __docformat__ = "restructuredtext en"
     9 
     9 
    10 from itertools import repeat
    10 from itertools import repeat
    82                 ex.var = varname
    82                 ex.var = varname
    83                 raise ex
    83                 raise ex
    84             #assert len(erqlexprs) == 1
    84             #assert len(erqlexprs) == 1
    85             localchecks[varname] = tuple(erqlexprs)
    85             localchecks[varname] = tuple(erqlexprs)
    86     return localchecks
    86     return localchecks
    87                     
    87 
    88 def noinvariant_vars(restricted, select, nbtrees):
    88 def noinvariant_vars(restricted, select, nbtrees):
    89     # a variable can actually be invariant if it has not been restricted for
    89     # a variable can actually be invariant if it has not been restricted for
    90     # security reason or if security assertion hasn't modified the possible
    90     # security reason or if security assertion hasn't modified the possible
    91     # solutions for the query
    91     # solutions for the query
    92     if nbtrees != 1:
    92     if nbtrees != 1:
   112             if not vref.name in selected:
   112             if not vref.name in selected:
   113                 select.append_selected(vref)
   113                 select.append_selected(vref)
   114                 colalias = newselect.get_variable(vref.name, len(aliases))
   114                 colalias = newselect.get_variable(vref.name, len(aliases))
   115                 aliases.append(VariableRef(colalias))
   115                 aliases.append(VariableRef(colalias))
   116                 selected.add(vref.name)
   116                 selected.add(vref.name)
   117                 
   117 
   118 # Plans #######################################################################
   118 # Plans #######################################################################
   119 
   119 
   120 class ExecutionPlan(object):
   120 class ExecutionPlan(object):
   121     """the execution model of a rql query, composed of querier steps"""
   121     """the execution model of a rql query, composed of querier steps"""
   122     
   122 
   123     def __init__(self, querier, rqlst, args, session):
   123     def __init__(self, querier, rqlst, args, session):
   124         # original rql syntax tree
   124         # original rql syntax tree
   125         self.rqlst = rqlst
   125         self.rqlst = rqlst
   126         self.args = args or {}
   126         self.args = args or {}
   127         # session executing the query
   127         # session executing the query
   135         # various resource accesors
   135         # various resource accesors
   136         self.querier = querier
   136         self.querier = querier
   137         self.schema = querier.schema
   137         self.schema = querier.schema
   138         self.rqlhelper = querier._rqlhelper
   138         self.rqlhelper = querier._rqlhelper
   139         self.sqlannotate = querier.sqlgen_annotate
   139         self.sqlannotate = querier.sqlgen_annotate
   140         
   140 
   141     def annotate_rqlst(self):
   141     def annotate_rqlst(self):
   142         if not self.rqlst.annotated:
   142         if not self.rqlst.annotated:
   143             self.rqlhelper.annotate(self.rqlst)
   143             self.rqlhelper.annotate(self.rqlst)
   144             
   144 
   145     def add_step(self, step):
   145     def add_step(self, step):
   146         """add a step to the plan"""
   146         """add a step to the plan"""
   147         self.steps.append(step)
   147         self.steps.append(step)
   148 
   148 
   149     def clean(self):
   149     def clean(self):
   150         """remove temporary tables"""
   150         """remove temporary tables"""
   151         self.syssource.clean_temp_data(self.session, self.temp_tables)
   151         self.syssource.clean_temp_data(self.session, self.temp_tables)
   152         
   152 
   153     def sqlexec(self, sql, args=None):
   153     def sqlexec(self, sql, args=None):
   154         return self.syssource.sqlexec(self.session, sql, args)
   154         return self.syssource.sqlexec(self.session, sql, args)
   155             
   155 
   156     def execute(self):
   156     def execute(self):
   157         """execute a plan and return resulting rows"""
   157         """execute a plan and return resulting rows"""
   158         try:
   158         try:
   159             for step in self.steps:
   159             for step in self.steps:
   160                 result = step.execute()
   160                 result = step.execute()
   161             # the latest executed step contains the full query result
   161             # the latest executed step contains the full query result
   162             return result
   162             return result
   163         finally:
   163         finally:
   164             self.clean()
   164             self.clean()
   165             
   165 
   166     def init_temp_table(self, table, selected, sol):
   166     def init_temp_table(self, table, selected, sol):
   167         """initialize sql schema and variable map for a temporary table which
   167         """initialize sql schema and variable map for a temporary table which
   168         will be used to store result for the given rqlst
   168         will be used to store result for the given rqlst
   169         """
   169         """
   170         try:
   170         try:
   173         except KeyError:
   173         except KeyError:
   174             sqlschema, outputmap = self.syssource.temp_table_def(selected, sol,
   174             sqlschema, outputmap = self.syssource.temp_table_def(selected, sol,
   175                                                                  table)
   175                                                                  table)
   176             self.temp_tables[table] = [outputmap, sqlschema, False]
   176             self.temp_tables[table] = [outputmap, sqlschema, False]
   177         return outputmap
   177         return outputmap
   178         
   178 
   179     def create_temp_table(self, table):
   179     def create_temp_table(self, table):
   180         """create a temporary table to store result for the given rqlst"""
   180         """create a temporary table to store result for the given rqlst"""
   181         if not self.temp_tables[table][-1]:
   181         if not self.temp_tables[table][-1]:
   182             sqlschema = self.temp_tables[table][1]
   182             sqlschema = self.temp_tables[table][1]
   183             self.syssource.create_temp_table(self.session, table, sqlschema)
   183             self.syssource.create_temp_table(self.session, table, sqlschema)
   184             self.temp_tables[table][-1] = True
   184             self.temp_tables[table][-1] = True
   185         
   185 
   186     def preprocess(self, union, security=True):
   186     def preprocess(self, union, security=True):
   187         """insert security when necessary then annotate rql st for sql generation
   187         """insert security when necessary then annotate rql st for sql generation
   188         
   188 
   189         return rqlst to actually execute
   189         return rqlst to actually execute
   190         """
   190         """
   191         #if server.DEBUG:
   191         #if server.DEBUG:
   192         #    print '------- preprocessing', union.as_string('utf8')
   192         #    print '------- preprocessing', union.as_string('utf8')
   193         noinvariant = set()
   193         noinvariant = set()
   277 
   277 
   278         solutions where a variable has a type which the user can't definitly read
   278         solutions where a variable has a type which the user can't definitly read
   279         are removed, else if the user may read it (eg if an rql expression is
   279         are removed, else if the user may read it (eg if an rql expression is
   280         defined for the "read" permission of the related type), the local checks
   280         defined for the "read" permission of the related type), the local checks
   281         dict for the solution is updated
   281         dict for the solution is updated
   282         
   282 
   283         return a dict with entries for each different local check necessary,
   283         return a dict with entries for each different local check necessary,
   284         with associated solutions as value. A local check is defined by a list
   284         with associated solutions as value. A local check is defined by a list
   285         of 2-uple, with variable name as first item and the necessary rql
   285         of 2-uple, with variable name as first item and the necessary rql
   286         expression as second item for each variable which has to be checked.
   286         expression as second item for each variable which has to be checked.
   287         So solutions which don't require local checks will be associated to
   287         So solutions which don't require local checks will be associated to
   344                 sol[newvarname] = nvartype
   344                 sol[newvarname] = nvartype
   345         select.clean_solutions(solutions)
   345         select.clean_solutions(solutions)
   346         self.rqlhelper.annotate(rqlst)
   346         self.rqlhelper.annotate(rqlst)
   347         self.preprocess(rqlst, security=False)
   347         self.preprocess(rqlst, security=False)
   348         return rqlst
   348         return rqlst
   349        
   349 
   350 class InsertPlan(ExecutionPlan):
   350 class InsertPlan(ExecutionPlan):
   351     """an execution model specific to the INSERT rql query
   351     """an execution model specific to the INSERT rql query
   352     """
   352     """
   353     
   353 
   354     def __init__(self, querier, rqlst, args, session):
   354     def __init__(self, querier, rqlst, args, session):
   355         ExecutionPlan.__init__(self, querier, rqlst, args, session)
   355         ExecutionPlan.__init__(self, querier, rqlst, args, session)
   356         # save originaly selected variable, we may modify this
   356         # save originaly selected variable, we may modify this
   357         # dictionary for substitution (query parameters)
   357         # dictionary for substitution (query parameters)
   358         self.selected = rqlst.selection
   358         self.selected = rqlst.selection
   385                 if isinstance(rhs, Constant) and not rhs.uid:
   385                 if isinstance(rhs, Constant) and not rhs.uid:
   386                     # add constant values to entity def
   386                     # add constant values to entity def
   387                     value = rhs.eval(self.args)
   387                     value = rhs.eval(self.args)
   388                     eschema = edef.e_schema
   388                     eschema = edef.e_schema
   389                     attrtype = eschema.subject_relation(rtype).objects(eschema)[0]
   389                     attrtype = eschema.subject_relation(rtype).objects(eschema)[0]
   390                     if attrtype == 'Password' and isinstance(value, unicode): 
   390                     if attrtype == 'Password' and isinstance(value, unicode):
   391                         value = value.encode('UTF8')
   391                         value = value.encode('UTF8')
   392                     edef[rtype] = value
   392                     edef[rtype] = value
   393                 elif to_build.has_key(str(rhs)):
   393                 elif to_build.has_key(str(rhs)):
   394                     # create a relation between two newly created variables
   394                     # create a relation between two newly created variables
   395                     self.add_relation_def((edef, rtype, to_build[rhs.name]))
   395                     self.add_relation_def((edef, rtype, to_build[rhs.name]))
   396                 else:
   396                 else:
   397                     to_select.setdefault(edef, []).append( (rtype, rhs, 0) )
   397                     to_select.setdefault(edef, []).append( (rtype, rhs, 0) )
   398         return to_select
   398         return to_select
   399 
   399 
   400         
   400 
   401     def add_entity_def(self, edef):
   401     def add_entity_def(self, edef):
   402         """add an entity definition to build"""
   402         """add an entity definition to build"""
   403         edef.querier_pending_relations = {}
   403         edef.querier_pending_relations = {}
   404         self.e_defs[-1].append(edef)
   404         self.e_defs[-1].append(edef)
   405         
   405 
   406     def add_relation_def(self, rdef):
   406     def add_relation_def(self, rdef):
   407         """add an relation definition to build"""
   407         """add an relation definition to build"""
   408         self.r_defs.append(rdef)
   408         self.r_defs.append(rdef)
   409         if not isinstance(rdef[0], int):
   409         if not isinstance(rdef[0], int):
   410             self._r_subj_index.setdefault(rdef[0], []).append(rdef)
   410             self._r_subj_index.setdefault(rdef[0], []).append(rdef)
   411         if not isinstance(rdef[2], int):
   411         if not isinstance(rdef[2], int):
   412             self._r_obj_index.setdefault(rdef[2], []).append(rdef)
   412             self._r_obj_index.setdefault(rdef[2], []).append(rdef)
   413         
   413 
   414     def substitute_entity_def(self, edef, edefs):
   414     def substitute_entity_def(self, edef, edefs):
   415         """substitute an incomplete entity definition by a list of complete
   415         """substitute an incomplete entity definition by a list of complete
   416         equivalents
   416         equivalents
   417         
   417 
   418         e.g. on queries such as ::
   418         e.g. on queries such as ::
   419           INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y
   419           INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y
   420           WHERE U login 'admin', U login N
   420           WHERE U login 'admin', U login N
   421 
   421 
   422         X will be inserted as many times as U exists, and so the X travaille Y
   422         X will be inserted as many times as U exists, and so the X travaille Y
   453                 result = []
   453                 result = []
   454                 for exp_rdef in expanded:
   454                 for exp_rdef in expanded:
   455                     for edef in edefs:
   455                     for edef in edefs:
   456                         result.append( (exp_rdef[0], exp_rdef[1], edef) )
   456                         result.append( (exp_rdef[0], exp_rdef[1], edef) )
   457                 self._expanded_r_defs[rdef] = result
   457                 self._expanded_r_defs[rdef] = result
   458         
   458 
   459     def _expanded(self, rdef):
   459     def _expanded(self, rdef):
   460         """return expanded value for the given relation definition"""
   460         """return expanded value for the given relation definition"""
   461         try:
   461         try:
   462             return self._expanded_r_defs[rdef]
   462             return self._expanded_r_defs[rdef]
   463         except KeyError:
   463         except KeyError:
   464             self.r_defs.remove(rdef)
   464             self.r_defs.remove(rdef)
   465             return [rdef]
   465             return [rdef]
   466         
   466 
   467     def relation_defs(self):
   467     def relation_defs(self):
   468         """return the list for relation definitions to insert"""
   468         """return the list for relation definitions to insert"""
   469         for rdefs in self._expanded_r_defs.values():
   469         for rdefs in self._expanded_r_defs.values():
   470             for rdef in rdefs:
   470             for rdef in rdefs:
   471                 yield rdef
   471                 yield rdef
   472         for rdef in self.r_defs:
   472         for rdef in self.r_defs:
   473             yield rdef
   473             yield rdef
   474             
   474 
   475     def insert_entity_defs(self):
   475     def insert_entity_defs(self):
   476         """return eids of inserted entities in a suitable form for the resulting
   476         """return eids of inserted entities in a suitable form for the resulting
   477         result set, e.g.:
   477         result set, e.g.:
   478         
   478 
   479         e.g. on queries such as ::
   479         e.g. on queries such as ::
   480           INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y
   480           INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y
   481           WHERE U login 'admin', U login N
   481           WHERE U login 'admin', U login N
   482 
   482 
   483         if there is two entities matching U, the result set will look like
   483         if there is two entities matching U, the result set will look like
   488         results = []
   488         results = []
   489         for row in self.e_defs:
   489         for row in self.e_defs:
   490             results.append([repo.glob_add_entity(session, edef)
   490             results.append([repo.glob_add_entity(session, edef)
   491                             for edef in row])
   491                             for edef in row])
   492         return results
   492         return results
   493         
   493 
   494     def insert_relation_defs(self):
   494     def insert_relation_defs(self):
   495         session = self.session
   495         session = self.session
   496         repo = session.repo
   496         repo = session.repo
   497         for subj, rtype, obj in self.relation_defs():
   497         for subj, rtype, obj in self.relation_defs():
   498             # if a string is given into args instead of an int, we get it here
   498             # if a string is given into args instead of an int, we get it here
   512                 repo.glob_add_relation(session, subj, rtype, obj)
   512                 repo.glob_add_relation(session, subj, rtype, obj)
   513 
   513 
   514 
   514 
   515 class QuerierHelper(object):
   515 class QuerierHelper(object):
   516     """helper class to execute rql queries, putting all things together"""
   516     """helper class to execute rql queries, putting all things together"""
   517     
   517 
   518     def __init__(self, repo, schema):
   518     def __init__(self, repo, schema):
   519         # system info helper
   519         # system info helper
   520         self._repo = repo
   520         self._repo = repo
   521         # application schema
   521         # application schema
   522         self.set_schema(schema)
   522         self.set_schema(schema)
   523         
   523 
   524     def set_schema(self, schema):
   524     def set_schema(self, schema):
   525         self.schema = schema
   525         self.schema = schema
   526         # rql parsing / analysing helper
   526         # rql parsing / analysing helper
   527         self._rqlhelper = RQLHelper(schema, special_relations={'eid': 'uid',
   527         self._rqlhelper = RQLHelper(schema, special_relations={'eid': 'uid',
   528                                                                'has_text': 'fti'})        
   528                                                                'has_text': 'fti'})
   529         self._rql_cache = Cache(self._repo.config['rql-cache-size'])
   529         self._rql_cache = Cache(self._repo.config['rql-cache-size'])
   530         self.cache_hit, self.cache_miss = 0, 0
   530         self.cache_hit, self.cache_miss = 0, 0
   531         # rql planner
   531         # rql planner
   532         # note: don't use repo.sources, may not be built yet, and also "admin"
   532         # note: don't use repo.sources, may not be built yet, and also "admin"
   533         #       isn't an actual source
   533         #       isn't an actual source
   534         if len([uri for uri in self._repo.config.sources() if uri != 'admin']) < 2:
   534         if len([uri for uri in self._repo.config.sources() if uri != 'admin']) < 2:
   535             from cubicweb.server.ssplanner import SSPlanner
   535             from cubicweb.server.ssplanner import SSPlanner
   536             self._planner = SSPlanner(schema, self._rqlhelper)
   536             self._planner = SSPlanner(schema, self._rqlhelper)
   537         else:
   537         else:
   538             from cubicweb.server.msplanner import MSPlanner            
   538             from cubicweb.server.msplanner import MSPlanner
   539             self._planner = MSPlanner(schema, self._rqlhelper)
   539             self._planner = MSPlanner(schema, self._rqlhelper)
   540         # sql generation annotator
   540         # sql generation annotator
   541         self.sqlgen_annotate = SQLGenAnnotator(schema).annotate
   541         self.sqlgen_annotate = SQLGenAnnotator(schema).annotate
   542         
   542 
   543     def parse(self, rql, annotate=False):
   543     def parse(self, rql, annotate=False):
   544         """return a rql syntax tree for the given rql"""
   544         """return a rql syntax tree for the given rql"""
   545         try:
   545         try:
   546             return self._rqlhelper.parse(unicode(rql), annotate=annotate)
   546             return self._rqlhelper.parse(unicode(rql), annotate=annotate)
   547         except UnicodeError:
   547         except UnicodeError:
   557     def plan_factory(self, rqlst, args, session):
   557     def plan_factory(self, rqlst, args, session):
   558         """create an execution plan for an INSERT RQL query"""
   558         """create an execution plan for an INSERT RQL query"""
   559         if rqlst.TYPE == 'insert':
   559         if rqlst.TYPE == 'insert':
   560             return InsertPlan(self, rqlst, args, session)
   560             return InsertPlan(self, rqlst, args, session)
   561         return ExecutionPlan(self, rqlst, args, session)
   561         return ExecutionPlan(self, rqlst, args, session)
   562         
   562 
   563     def execute(self, session, rql, args=None, eid_key=None, build_descr=True):
   563     def execute(self, session, rql, args=None, eid_key=None, build_descr=True):
   564         """execute a rql query, return resulting rows and their description in
   564         """execute a rql query, return resulting rows and their description in
   565         a `ResultSet` object
   565         a `ResultSet` object
   566 
   566 
   567         * `rql` should be an unicode string or a plain ascii string
   567         * `rql` should be an unicode string or a plain ascii string
   576           and resolve some ambiguity in the possible solutions infered for each
   576           and resolve some ambiguity in the possible solutions infered for each
   577           variable in the query.
   577           variable in the query.
   578 
   578 
   579         on INSERT queries, there will be on row with the eid of each inserted
   579         on INSERT queries, there will be on row with the eid of each inserted
   580         entity
   580         entity
   581         
   581 
   582         result for DELETE and SET queries is undefined yet
   582         result for DELETE and SET queries is undefined yet
   583 
   583 
   584         to maximize the rql parsing/analyzing cache performance, you should
   584         to maximize the rql parsing/analyzing cache performance, you should
   585         always use substitute arguments in queries (eg avoid query such as
   585         always use substitute arguments in queries (eg avoid query such as
   586         'Any X WHERE X eid 123'!)
   586         'Any X WHERE X eid 123'!)