server/querier.py
branchstable
changeset 9167 c05652b108ce
parent 8867 6ad000b91347
child 9267 24d9b86dfa54
equal deleted inserted replaced
9166:e47e192ea0d9 9167:c05652b108ce
     1 # copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     1 # copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
     3 #
     3 #
     4 # This file is part of CubicWeb.
     4 # This file is part of CubicWeb.
     5 #
     5 #
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
    22 
    22 
    23 from itertools import repeat
    23 from itertools import repeat
    24 
    24 
    25 from logilab.common.compat import any
    25 from logilab.common.compat import any
    26 from rql import RQLSyntaxError, CoercionError
    26 from rql import RQLSyntaxError, CoercionError
    27 from rql.stmts import Union, Select
    27 from rql.stmts import Union
    28 from rql.nodes import ETYPE_PYOBJ_MAP, etype_from_pyobj
    28 from rql.nodes import ETYPE_PYOBJ_MAP, etype_from_pyobj, Relation, Exists, Not
    29 from rql.nodes import (Relation, VariableRef, Constant, SubQuery, Function,
       
    30                        Exists, Not)
       
    31 from yams import BASE_TYPES
    29 from yams import BASE_TYPES
    32 
    30 
    33 from cubicweb import ValidationError, Unauthorized, QueryError, UnknownEid
    31 from cubicweb import ValidationError, Unauthorized, UnknownEid
    34 from cubicweb import Binary, server
    32 from cubicweb import Binary, server
    35 from cubicweb.rset import ResultSet
    33 from cubicweb.rset import ResultSet
    36 
    34 
    37 from cubicweb.utils import QueryCache, RepeatList
    35 from cubicweb.utils import QueryCache, RepeatList
    38 from cubicweb.server.utils import cleanup_solutions
       
    39 from cubicweb.server.rqlannotation import SQLGenAnnotator, set_qdata
    36 from cubicweb.server.rqlannotation import SQLGenAnnotator, set_qdata
    40 from cubicweb.server.ssplanner import READ_ONLY_RTYPES, add_types_restriction
    37 from cubicweb.server.ssplanner import READ_ONLY_RTYPES, add_types_restriction
    41 from cubicweb.server.edition import EditedEntity
    38 from cubicweb.server.edition import EditedEntity
    42 
    39 
    43 
    40 
    75         return solution[term.name]
    72         return solution[term.name]
    76     except AttributeError:
    73     except AttributeError:
    77         return session.describe(term.eval(args))[0]
    74         return session.describe(term.eval(args))[0]
    78 
    75 
    79 def check_read_access(session, rqlst, solution, args):
    76 def check_read_access(session, rqlst, solution, args):
    80     """check that the given user has credentials to access data read the
    77     """Check that the given user has credentials to access data read by the
    81     query
    78     query and return a dict defining necessary "local checks" (i.e. rql
    82 
    79     expression in read permission defined in the schema) where no group grants
    83     return a dict defining necessary local checks (due to use of rql expression
    80     him the permission.
    84     in the schema), keys are variable names and values associated rql expression
    81 
    85     for the associated variable with the given solution
    82     Returned dictionary's keys are variable names and values the rql expressions
       
    83     for this variable (with the given solution).
    86     """
    84     """
    87     # use `term_etype` since we've to deal with rewritten constants here,
    85     # use `term_etype` since we've to deal with rewritten constants here,
    88     # when used as an external source by another repository.
    86     # when used as an external source by another repository.
    89     # XXX what about local read security w/ those rewritten constants...
    87     # XXX what about local read security w/ those rewritten constants...
    90     schema = session.repo.schema
    88     schema = session.repo.schema
   128                               or isinstance(r.parent, Not)))])
   126                               or isinstance(r.parent, Not)))])
   129                 != len(varinfo['relations'])):
   127                 != len(varinfo['relations'])):
   130                 localchecks[varname] = erqlexprs
   128                 localchecks[varname] = erqlexprs
   131     return localchecks
   129     return localchecks
   132 
   130 
   133 def add_noinvariant(noinvariant, restricted, select, nbtrees):
       
   134     # a variable can actually be invariant if it has not been restricted for
       
   135     # security reason or if security assertion hasn't modified the possible
       
   136     # solutions for the query
       
   137     if nbtrees != 1:
       
   138         for vname in restricted:
       
   139             try:
       
   140                 noinvariant.add(select.defined_vars[vname])
       
   141             except KeyError:
       
   142                 # this is an alias
       
   143                 continue
       
   144     else:
       
   145         for vname in restricted:
       
   146             try:
       
   147                 var = select.defined_vars[vname]
       
   148             except KeyError:
       
   149                 # this is an alias
       
   150                 continue
       
   151             if len(var.stinfo['possibletypes']) != 1:
       
   152                 noinvariant.add(var)
       
   153 
       
   154 def _expand_selection(terms, selected, aliases, select, newselect):
       
   155     for term in terms:
       
   156         for vref in term.iget_nodes(VariableRef):
       
   157             if not vref.name in selected:
       
   158                 select.append_selected(vref)
       
   159                 colalias = newselect.get_variable(vref.name, len(aliases))
       
   160                 aliases.append(VariableRef(colalias))
       
   161                 selected.add(vref.name)
       
   162 
   131 
   163 # Plans #######################################################################
   132 # Plans #######################################################################
   164 
   133 
   165 class ExecutionPlan(object):
   134 class ExecutionPlan(object):
   166     """the execution model of a rql query, composed of querier steps"""
   135     """the execution model of a rql query, composed of querier steps"""
   256                 union.has_text_query = cachedunion.has_text_query
   225                 union.has_text_query = cachedunion.has_text_query
   257                 args.update(self.args)
   226                 args.update(self.args)
   258                 self.args = args
   227                 self.args = args
   259                 cached = True
   228                 cached = True
   260             else:
   229             else:
   261                 noinvariant = set()
       
   262                 with self.session.security_enabled(read=False):
   230                 with self.session.security_enabled(read=False):
   263                     self._insert_security(union, noinvariant)
   231                     noinvariant = self._insert_security(union)
   264                 if key is not None:
   232                 if key is not None:
   265                     self.session.transaction_data[key] = (union, self.args)
   233                     self.session.transaction_data[key] = (union, self.args)
   266         else:
   234         else:
   267             noinvariant = ()
   235             noinvariant = ()
   268         if cached is None:
   236         if cached is None:
   270             self.sqlannotate(union)
   238             self.sqlannotate(union)
   271             set_qdata(self.schema.rschema, union, noinvariant)
   239             set_qdata(self.schema.rschema, union, noinvariant)
   272         if union.has_text_query:
   240         if union.has_text_query:
   273             self.cache_key = None
   241             self.cache_key = None
   274 
   242 
   275     def _insert_security(self, union, noinvariant):
   243     def _insert_security(self, union):
       
   244         noinvariant = set()
   276         for select in union.children[:]:
   245         for select in union.children[:]:
   277             for subquery in select.with_:
   246             for subquery in select.with_:
   278                 self._insert_security(subquery.query, noinvariant)
   247                 self._insert_security(subquery.query)
   279             localchecks, restricted = self._check_permissions(select)
   248             localchecks, restricted = self._check_permissions(select)
   280             if any(localchecks):
   249             if any(localchecks):
   281                 rewrite = self.session.rql_rewriter.rewrite
   250                 self.session.rql_rewriter.insert_local_checks(
   282                 nbtrees = len(localchecks)
   251                     select, self.args, localchecks, restricted, noinvariant)
   283                 myunion = union
   252         return noinvariant
   284                 # transform in subquery when len(localchecks)>1 and groups
       
   285                 if nbtrees > 1 and (select.orderby or select.groupby or
       
   286                                     select.having or select.has_aggregat or
       
   287                                     select.distinct or
       
   288                                     select.limit or select.offset):
       
   289                     newselect = Select()
       
   290                     # only select variables in subqueries
       
   291                     origselection = select.selection
       
   292                     select.select_only_variables()
       
   293                     select.has_aggregat = False
       
   294                     # create subquery first so correct node are used on copy
       
   295                     # (eg ColumnAlias instead of Variable)
       
   296                     aliases = [VariableRef(newselect.get_variable(vref.name, i))
       
   297                                for i, vref in enumerate(select.selection)]
       
   298                     selected = set(vref.name for vref in aliases)
       
   299                     # now copy original selection and groups
       
   300                     for term in origselection:
       
   301                         newselect.append_selected(term.copy(newselect))
       
   302                     if select.orderby:
       
   303                         sortterms = []
       
   304                         for sortterm in select.orderby:
       
   305                             sortterms.append(sortterm.copy(newselect))
       
   306                             for fnode in sortterm.get_nodes(Function):
       
   307                                 if fnode.name == 'FTIRANK':
       
   308                                     # we've to fetch the has_text relation as well
       
   309                                     var = fnode.children[0].variable
       
   310                                     rel = iter(var.stinfo['ftirels']).next()
       
   311                                     assert not rel.ored(), 'unsupported'
       
   312                                     newselect.add_restriction(rel.copy(newselect))
       
   313                                     # remove relation from the orig select and
       
   314                                     # cleanup variable stinfo
       
   315                                     rel.parent.remove(rel)
       
   316                                     var.stinfo['ftirels'].remove(rel)
       
   317                                     var.stinfo['relations'].remove(rel)
       
   318                                     # XXX not properly re-annotated after security insertion?
       
   319                                     newvar = newselect.get_variable(var.name)
       
   320                                     newvar.stinfo.setdefault('ftirels', set()).add(rel)
       
   321                                     newvar.stinfo.setdefault('relations', set()).add(rel)
       
   322                         newselect.set_orderby(sortterms)
       
   323                         _expand_selection(select.orderby, selected, aliases, select, newselect)
       
   324                         select.orderby = () # XXX dereference?
       
   325                     if select.groupby:
       
   326                         newselect.set_groupby([g.copy(newselect) for g in select.groupby])
       
   327                         _expand_selection(select.groupby, selected, aliases, select, newselect)
       
   328                         select.groupby = () # XXX dereference?
       
   329                     if select.having:
       
   330                         newselect.set_having([g.copy(newselect) for g in select.having])
       
   331                         _expand_selection(select.having, selected, aliases, select, newselect)
       
   332                         select.having = () # XXX dereference?
       
   333                     if select.limit:
       
   334                         newselect.limit = select.limit
       
   335                         select.limit = None
       
   336                     if select.offset:
       
   337                         newselect.offset = select.offset
       
   338                         select.offset = 0
       
   339                     myunion = Union()
       
   340                     newselect.set_with([SubQuery(aliases, myunion)], check=False)
       
   341                     newselect.distinct = select.distinct
       
   342                     solutions = [sol.copy() for sol in select.solutions]
       
   343                     cleanup_solutions(newselect, solutions)
       
   344                     newselect.set_possible_types(solutions)
       
   345                     # if some solutions doesn't need rewriting, insert original
       
   346                     # select as first union subquery
       
   347                     if () in localchecks:
       
   348                         myunion.append(select)
       
   349                     # we're done, replace original select by the new select with
       
   350                     # subqueries (more added in the loop below)
       
   351                     union.replace(select, newselect)
       
   352                 elif not () in localchecks:
       
   353                     union.remove(select)
       
   354                 for lcheckdef, lchecksolutions in localchecks.iteritems():
       
   355                     if not lcheckdef:
       
   356                         continue
       
   357                     myrqlst = select.copy(solutions=lchecksolutions)
       
   358                     myunion.append(myrqlst)
       
   359                     # in-place rewrite + annotation / simplification
       
   360                     lcheckdef = [({var: 'X'}, rqlexprs) for var, rqlexprs in lcheckdef]
       
   361                     rewrite(myrqlst, lcheckdef, lchecksolutions, self.args)
       
   362                     add_noinvariant(noinvariant, restricted, myrqlst, nbtrees)
       
   363                 if () in localchecks:
       
   364                     select.set_possible_types(localchecks[()])
       
   365                     add_types_restriction(self.schema, select)
       
   366                     add_noinvariant(noinvariant, restricted, select, nbtrees)
       
   367                 self.rqlhelper.annotate(union)
       
   368 
   253 
   369     def _check_permissions(self, rqlst):
   254     def _check_permissions(self, rqlst):
   370         """return a dict defining "local checks", e.g. RQLExpression defined in
   255         """Return a dict defining "local checks", i.e. RQLExpression defined in
   371         the schema that should be inserted in the original query
   256         the schema that should be inserted in the original query, together with
   372 
   257         a set of variable names which requires some security to be inserted.
   373         solutions where a variable has a type which the user can't definitly read
   258 
   374         are removed, else if the user may read it (eg if an rql expression is
   259         Solutions where a variable has a type which the user can't definitly
   375         defined for the "read" permission of the related type), the local checks
   260         read are removed, else if the user *may* read it (i.e. if an rql
   376         dict for the solution is updated
   261         expression is defined for the "read" permission of the related type),
   377 
   262         the local checks dict is updated.
   378         return a dict with entries for each different local check necessary,
   263 
   379         with associated solutions as value. A local check is defined by a list
   264         The local checks dict has entries for each different local check
   380         of 2-uple, with variable name as first item and the necessary rql
   265         necessary, with associated solutions as value, a local check being
   381         expression as second item for each variable which has to be checked.
   266         defined by a list of 2-uple (variable name, rql expressions) for each
   382         So solutions which don't require local checks will be associated to
   267         variable which has to be checked. Solutions which don't require local
   383         the empty tuple key.
   268         checks will be associated to the empty tuple key.
   384 
   269 
   385         note: rqlst should not have been simplified at this point
   270         Note rqlst should not have been simplified at this point.
   386         """
   271         """
   387         session = self.session
   272         session = self.session
   388         msgs = []
   273         msgs = []
   389         neweids = session.transaction_data.get('neweids', ())
   274         # dict(varname: eid), allowing to check rql expression for variables
       
   275         # which have a known eid
   390         varkwargs = {}
   276         varkwargs = {}
   391         if not session.transaction_data.get('security-rqlst-cache'):
   277         if not session.transaction_data.get('security-rqlst-cache'):
   392             for var in rqlst.defined_vars.itervalues():
   278             for var in rqlst.defined_vars.itervalues():
   393                 if var.stinfo['constnode'] is not None:
   279                 if var.stinfo['constnode'] is not None:
   394                     eid = var.stinfo['constnode'].eval(self.args)
   280                     eid = var.stinfo['constnode'].eval(self.args)
   412                 for varname, eid in varkwargs.iteritems():
   298                 for varname, eid in varkwargs.iteritems():
   413                     try:
   299                     try:
   414                         rqlexprs = localcheck.pop(varname)
   300                         rqlexprs = localcheck.pop(varname)
   415                     except KeyError:
   301                     except KeyError:
   416                         continue
   302                         continue
   417                     if eid in neweids:
   303                     # if entity has been added in the current transaction, the
       
   304                     # user can read it whatever rql expressions are associated
       
   305                     # to its type
       
   306                     if session.added_in_transaction(eid):
   418                         continue
   307                         continue
   419                     for rqlexpr in rqlexprs:
   308                     for rqlexpr in rqlexprs:
   420                         if rqlexpr.check(session, eid):
   309                         if rqlexpr.check(session, eid):
   421                             break
   310                             break
   422                     else:
   311                     else:
   423                         raise Unauthorized('No read acces on %r with eid %i.' % (var, eid))
   312                         raise Unauthorized('No read acces on %r with eid %i.' % (var, eid))
       
   313                 # mark variables protected by an rql expression
   424                 restricted_vars.update(localcheck)
   314                 restricted_vars.update(localcheck)
   425                 localchecks.setdefault(tuple(localcheck.iteritems()), []).append(solution)
   315                 # turn local check into a dict key
       
   316                 localcheck = tuple(sorted(localcheck.iteritems()))
       
   317                 localchecks.setdefault(localcheck, []).append(solution)
   426         # raise Unautorized exception if the user can't access to any solution
   318         # raise Unautorized exception if the user can't access to any solution
   427         if not newsolutions:
   319         if not newsolutions:
   428             raise Unauthorized('\n'.join(msgs))
   320             raise Unauthorized('\n'.join(msgs))
       
   321         # if there is some message, solutions have been modified and must be
       
   322         # reconsidered by the syntax treee
   429         if msgs:
   323         if msgs:
   430             # (else solutions have not been modified)
       
   431             rqlst.set_possible_types(newsolutions)
   324             rqlst.set_possible_types(newsolutions)
   432         return localchecks, restricted_vars
   325         return localchecks, restricted_vars
   433 
   326 
   434     def finalize(self, select, solutions, insertedvars):
   327     def finalize(self, select, solutions, insertedvars):
   435         rqlst = Union()
   328         rqlst = Union()
   726             rqlst = rqlst.copy()
   619             rqlst = rqlst.copy()
   727             self._annotate(rqlst)
   620             self._annotate(rqlst)
   728             if args:
   621             if args:
   729                 # different SQL generated when some argument is None or not (IS
   622                 # different SQL generated when some argument is None or not (IS
   730                 # NULL). This should be considered when computing sql cache key
   623                 # NULL). This should be considered when computing sql cache key
   731                 cachekey += tuple(sorted([k for k,v in args.iteritems()
   624                 cachekey += tuple(sorted([k for k, v in args.iteritems()
   732                                           if v is None]))
   625                                           if v is None]))
   733         # make an execution plan
   626         # make an execution plan
   734         plan = self.plan_factory(rqlst, args, session)
   627         plan = self.plan_factory(rqlst, args, session)
   735         plan.cache_key = cachekey
   628         plan.cache_key = cachekey
   736         self._planner.build_plan(plan)
   629         self._planner.build_plan(plan)