cubicweb/server/rqlannotation.py
changeset 11057 0b59724cb3f2
parent 10669 155c29e0ed1c
child 11703 670aa9bf0b6c
equal deleted inserted replaced
11052:058bb3dc685f 11057:0b59724cb3f2
       
     1 # copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     3 #
       
     4 # This file is part of CubicWeb.
       
     5 #
       
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
       
     7 # terms of the GNU Lesser General Public License as published by the Free
       
     8 # Software Foundation, either version 2.1 of the License, or (at your option)
       
     9 # any later version.
       
    10 #
       
    11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT
       
    12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
       
    13 # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
       
    14 # details.
       
    15 #
       
    16 # You should have received a copy of the GNU Lesser General Public License along
       
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
       
    18 """Functions to add additional annotations on a rql syntax tree to ease later
       
    19 code generation.
       
    20 """
       
    21 from __future__ import print_function
       
    22 
       
    23 __docformat__ = "restructuredtext en"
       
    24 
       
    25 from rql import BadRQLQuery
       
    26 from rql.nodes import Relation, VariableRef, Constant, Variable, Or, Exists
       
    27 from rql.utils import common_parent
       
    28 
       
    29 def _annotate_select(annotator, rqlst):
       
    30     has_text_query = False
       
    31     for subquery in rqlst.with_:
       
    32         if annotator._annotate_union(subquery.query):
       
    33             has_text_query = True
       
    34     #if server.DEBUG:
       
    35     #    print '-------- sql annotate', repr(rqlst)
       
    36     getrschema = annotator.schema.rschema
       
    37     for var in rqlst.defined_vars.values():
       
    38         stinfo = var.stinfo
       
    39         if stinfo.get('ftirels'):
       
    40             has_text_query = True
       
    41         if stinfo['attrvar']:
       
    42             stinfo['invariant'] = False
       
    43             stinfo['principal'] = _select_main_var(stinfo['rhsrelations'])
       
    44             continue
       
    45         if not stinfo['relations'] and stinfo['typerel'] is None:
       
    46             # Any X, Any MAX(X)...
       
    47             # those particular queries should be executed using the system
       
    48             # entities table unless there is some type restriction
       
    49             stinfo['invariant'] = True
       
    50             stinfo['principal'] = None
       
    51             continue
       
    52         if any(rel for rel in stinfo['relations'] if rel.r_type == 'eid' and rel.operator() != '=') and \
       
    53                not any(r for r in var.stinfo['relations'] - var.stinfo['rhsrelations']
       
    54                        if r.r_type != 'eid' and (getrschema(r.r_type).inlined or getrschema(r.r_type).final)):
       
    55             # Any X WHERE X eid > 2
       
    56             # those particular queries should be executed using the system entities table
       
    57             stinfo['invariant'] = True
       
    58             stinfo['principal'] = None
       
    59             continue
       
    60         if stinfo['selected'] and var.valuable_references() == 1+bool(stinfo['constnode']):
       
    61             # "Any X", "Any X, Y WHERE X attr Y"
       
    62             stinfo['invariant'] = False
       
    63             continue
       
    64         joins = set()
       
    65         invariant = False
       
    66         for ref in var.references():
       
    67             rel = ref.relation()
       
    68             if rel is None or rel.is_types_restriction():
       
    69                 continue
       
    70             lhs, rhs = rel.get_parts()
       
    71             onlhs = ref is lhs
       
    72             role = 'subject' if onlhs else 'object'
       
    73             if rel.r_type == 'eid':
       
    74                 if not (onlhs and len(stinfo['relations']) > 1):
       
    75                     break
       
    76                 if not stinfo['constnode']:
       
    77                     joins.add( (rel, role) )
       
    78                 continue
       
    79             elif rel.r_type == 'identity':
       
    80                 # identity can't be used as principal, so check other relation are used
       
    81                 # XXX explain rhs.operator == '='
       
    82                 if rhs.operator != '=' or len(stinfo['relations']) <= 1: #(stinfo['constnode'] and rhs.operator == '='):
       
    83                     break
       
    84                 joins.add( (rel, role) )
       
    85                 continue
       
    86             rschema = getrschema(rel.r_type)
       
    87             if rel.optional:
       
    88                 if rel in stinfo.get('optrelations', ()):
       
    89                     # optional variable can't be invariant if this is the lhs
       
    90                     # variable of an inlined relation
       
    91                     if not rel in stinfo['rhsrelations'] and rschema.inlined:
       
    92                         break
       
    93                 # variable used as main variable of an optional relation can't
       
    94                 # be invariant, unless we can use some other relation as
       
    95                 # reference for the outer join
       
    96                 elif not stinfo['constnode']:
       
    97                     break
       
    98                 elif len(stinfo['relations']) == 2:
       
    99                     if onlhs:
       
   100                         ostinfo = rhs.children[0].variable.stinfo
       
   101                     else:
       
   102                         ostinfo = lhs.variable.stinfo
       
   103                     if not (ostinfo.get('optcomparisons') or
       
   104                             any(orel for orel in ostinfo['relations']
       
   105                                 if orel.optional and orel is not rel)):
       
   106                         break
       
   107             if rschema.final or (onlhs and rschema.inlined):
       
   108                 if rschema.type != 'has_text':
       
   109                     # need join anyway if the variable appears in a final or
       
   110                     # inlined relation
       
   111                     break
       
   112                 joins.add( (rel, role) )
       
   113                 continue
       
   114             if not stinfo['constnode']:
       
   115                 if rschema.inlined and rel.neged(strict=True):
       
   116                     # if relation is inlined, can't be invariant if that
       
   117                     # variable is used anywhere else.
       
   118                     # see 'Any P WHERE NOT N ecrit_par P, N eid 512':
       
   119                     # sql for 'NOT N ecrit_par P' is 'N.ecrit_par is NULL' so P
       
   120                     # can use N.ecrit_par as principal
       
   121                     if (stinfo['selected'] or len(stinfo['relations']) > 1):
       
   122                         break
       
   123             joins.add( (rel, role) )
       
   124         else:
       
   125             # if there is at least one ambigous relation and no other to
       
   126             # restrict types, can't be invariant since we need to filter out
       
   127             # other types
       
   128             if not annotator.is_ambiguous(var):
       
   129                 invariant = True
       
   130         stinfo['invariant'] = invariant
       
   131         if invariant and joins:
       
   132             # remember rqlst/solutions analyze information
       
   133             # we have to select a kindof "main" relation which will "extrajoins"
       
   134             # the other
       
   135             # priority should be given to relation which are not in inner queries
       
   136             # (eg exists)
       
   137             try:
       
   138                 stinfo['principal'] = principal = _select_principal(var.scope, joins)
       
   139                 if getrschema(principal.r_type).inlined:
       
   140                     # the scope of the lhs variable must be equal or outer to the
       
   141                     # rhs variable's scope (since it's retrieved from lhs's table)
       
   142                     sstinfo = principal.children[0].variable.stinfo
       
   143                     sstinfo['scope'] = common_parent(sstinfo['scope'], stinfo['scope']).scope
       
   144             except CantSelectPrincipal:
       
   145                 stinfo['invariant'] = False
       
   146     # see unittest_rqlannotation. test_has_text_security_cache_bug
       
   147     # XXX probably more to do, but yet that work without more...
       
   148     for col_alias in rqlst.aliases.values():
       
   149         if col_alias.stinfo.get('ftirels'):
       
   150             has_text_query = True
       
   151     return has_text_query
       
   152 
       
   153 
       
   154 
       
   155 class CantSelectPrincipal(Exception):
       
   156     """raised when no 'principal' variable can be found"""
       
   157 
       
   158 def _select_principal(scope, relations, _sort=lambda x:x):
       
   159     """given a list of rqlst relations, select one which will be used to
       
   160     represent an invariant variable (e.g. using on extremity of the relation
       
   161     instead of the variable's type table
       
   162     """
       
   163     # _sort argument is there for test
       
   164     diffscope_rels = {}
       
   165     ored_rels = set()
       
   166     diffscope_rels = set()
       
   167     for rel, role in _sort(relations):
       
   168         # note: only eid and has_text among all final relations may be there
       
   169         if rel.r_type in ('eid', 'identity'):
       
   170             continue
       
   171         if rel.optional is not None and len(relations) > 1:
       
   172             if role == 'subject' and rel.optional == 'right':
       
   173                 continue
       
   174             if role == 'object' and rel.optional == 'left':
       
   175                 continue
       
   176         if rel.ored(traverse_scope=True):
       
   177             ored_rels.add(rel)
       
   178         elif rel.scope is scope:
       
   179             return rel
       
   180         elif not rel.neged(traverse_scope=True):
       
   181             diffscope_rels.add(rel)
       
   182     if len(ored_rels) > 1:
       
   183         ored_rels_copy = tuple(ored_rels)
       
   184         for rel1 in ored_rels_copy:
       
   185             for rel2 in ored_rels_copy:
       
   186                 if rel1 is rel2:
       
   187                     continue
       
   188                 if isinstance(common_parent(rel1, rel2), Or):
       
   189                     ored_rels.discard(rel1)
       
   190                     ored_rels.discard(rel2)
       
   191     for rel in _sort(ored_rels):
       
   192         if rel.scope is scope:
       
   193             return rel
       
   194         diffscope_rels.add(rel)
       
   195     # if DISTINCT query, can use variable from a different scope as principal
       
   196     # since introduced duplicates will be removed
       
   197     if scope.stmt.distinct and diffscope_rels:
       
   198         return next(iter(_sort(diffscope_rels)))
       
   199     # XXX could use a relation from a different scope if it can't generate
       
   200     # duplicates, so we should have to check cardinality
       
   201     raise CantSelectPrincipal()
       
   202 
       
   203 def _select_main_var(relations):
       
   204     """given a list of rqlst relations, select one which will be used as main
       
   205     relation for the rhs variable
       
   206     """
       
   207     principal = None
       
   208     others = []
       
   209     # sort for test predictability
       
   210     for rel in sorted(relations, key=lambda x: (x.children[0].name, x.r_type)):
       
   211         # only equality relation with a variable as rhs may be principal
       
   212         if rel.operator() not in ('=', 'IS') \
       
   213                or not isinstance(rel.children[1].children[0], VariableRef) or rel.neged(strict=True):
       
   214             continue
       
   215         if rel.optional:
       
   216             others.append(rel)
       
   217             continue
       
   218         if rel.scope is rel.stmt:
       
   219             return rel
       
   220         principal = rel
       
   221     if principal is None:
       
   222         if others:
       
   223             return others[0]
       
   224         raise BadRQLQuery('unable to find principal in %s' % ', '.join(
       
   225             r.as_string() for r in relations))
       
   226     return principal
       
   227 
       
   228 
       
   229 def set_qdata(getrschema, union, noinvariant):
       
   230     """recursive function to set querier data on variables in the syntax tree
       
   231     """
       
   232     for select in union.children:
       
   233         for subquery in select.with_:
       
   234             set_qdata(getrschema, subquery.query, noinvariant)
       
   235         for var in select.defined_vars.values():
       
   236             if var.stinfo['invariant']:
       
   237                 if var in noinvariant and not var.stinfo['principal'].r_type == 'has_text':
       
   238                     var._q_invariant = False
       
   239                 else:
       
   240                     var._q_invariant = True
       
   241             else:
       
   242                 var._q_invariant = False
       
   243 
       
   244 
       
   245 class SQLGenAnnotator(object):
       
   246     def __init__(self, schema):
       
   247         self.schema = schema
       
   248         self.nfdomain = frozenset(eschema.type for eschema in schema.entities()
       
   249                                   if not eschema.final)
       
   250 
       
   251     def annotate(self, rqlst):
       
   252         """add information to the rql syntax tree to help sources to do their
       
   253         job (read sql generation)
       
   254 
       
   255         a variable is tagged as invariant if:
       
   256         * it's a non final variable
       
   257         * it's not used as lhs in any final or inlined relation
       
   258         * there is no type restriction on this variable (either explicit in the
       
   259           syntax tree or because a solution for this variable has been removed
       
   260           due to security filtering)
       
   261         """
       
   262         #assert rqlst.TYPE == 'select', rqlst
       
   263         rqlst.has_text_query = self._annotate_union(rqlst)
       
   264 
       
   265     def _annotate_union(self, union):
       
   266         has_text_query = False
       
   267         for select in union.children:
       
   268             if _annotate_select(self, select):
       
   269                 has_text_query = True
       
   270         return has_text_query
       
   271 
       
   272     def is_ambiguous(self, var):
       
   273         # ignore has_text relation when we know it will be used as principal.
       
   274         # This is expected by the rql2sql generator which will use the `entities`
       
   275         # table to filter out by type if necessary, This optimisation is very
       
   276         # interesting in multi-sources cases, as it may avoid a costly query
       
   277         # on sources to get all entities of a given type to achieve this, while
       
   278         # we have all the necessary information.
       
   279         root = var.stmt.root # Union node
       
   280         # rel.scope -> Select or Exists node, so add .parent to get Union from
       
   281         # Select node
       
   282         rels = [rel for rel in var.stinfo['relations'] if rel.scope.parent is root]
       
   283         if len(rels) == 1 and rels[0].r_type == 'has_text':
       
   284             return False
       
   285         try:
       
   286             data = var.stmt._deamb_data
       
   287         except AttributeError:
       
   288             data = var.stmt._deamb_data = IsAmbData(self.schema, self.nfdomain)
       
   289             data.compute(var.stmt)
       
   290         return data.is_ambiguous(var)
       
   291 
       
   292 
       
   293 class IsAmbData(object):
       
   294     def __init__(self, schema, nfdomain):
       
   295         self.schema = schema
       
   296         # shortcuts
       
   297         self.rschema = schema.rschema
       
   298         self.eschema = schema.eschema
       
   299         # domain for non final variables
       
   300         self.nfdomain = nfdomain
       
   301         # {var: possible solutions set}
       
   302         self.varsols = {}
       
   303         # set of ambiguous variables
       
   304         self.ambiguousvars = set()
       
   305         # remember if a variable has been deambiguified by another to avoid
       
   306         # doing the opposite
       
   307         self.deambification_map = {}
       
   308         # not invariant variables (access to final.inlined relation)
       
   309         self.not_invariants = set()
       
   310 
       
   311     def is_ambiguous(self, var):
       
   312         return var in self.ambiguousvars
       
   313 
       
   314     def restrict(self, var, restricted_domain):
       
   315         self.varsols[var] &= restricted_domain
       
   316         if var in self.ambiguousvars and self.varsols[var] == var.stinfo['possibletypes']:
       
   317             self.ambiguousvars.remove(var)
       
   318 
       
   319     def compute(self, rqlst):
       
   320         # set domains for each variable
       
   321         for varname, var in rqlst.defined_vars.items():
       
   322             if var.stinfo['uidrel'] is not None or \
       
   323                    self.eschema(rqlst.solutions[0][varname]).final:
       
   324                 ptypes = var.stinfo['possibletypes']
       
   325             else:
       
   326                 ptypes = set(self.nfdomain)
       
   327                 self.ambiguousvars.add(var)
       
   328             self.varsols[var] = ptypes
       
   329         if not self.ambiguousvars:
       
   330             return
       
   331         # apply relation restriction
       
   332         self.maydeambrels = maydeambrels = {}
       
   333         for rel in rqlst.iget_nodes(Relation):
       
   334             if rel.r_type == 'eid' or rel.is_types_restriction():
       
   335                 continue
       
   336             lhs, rhs = rel.get_variable_parts()
       
   337             if isinstance(lhs, VariableRef) or isinstance(rhs, VariableRef):
       
   338                 rschema = self.rschema(rel.r_type)
       
   339                 if rschema.inlined or rschema.final:
       
   340                     self.not_invariants.add(lhs.variable)
       
   341                 self.set_rel_constraint(lhs, rel, rschema.subjects)
       
   342                 self.set_rel_constraint(rhs, rel, rschema.objects)
       
   343         # try to deambiguify more variables by considering other variables'type
       
   344         modified = True
       
   345         while modified and self.ambiguousvars:
       
   346             modified = False
       
   347             for var in self.ambiguousvars.copy():
       
   348                 try:
       
   349                     for rel in (var.stinfo['relations'] & maydeambrels[var]):
       
   350                         if self.deambiguifying_relation(var, rel):
       
   351                             modified = True
       
   352                             break
       
   353                 except KeyError:
       
   354                     # no relation to deambiguify
       
   355                     continue
       
   356 
       
   357     def _debug_print(self):
       
   358         print('varsols', dict((x, sorted(str(v) for v in values))
       
   359                                for x, values in self.varsols.items()))
       
   360         print('ambiguous vars', sorted(self.ambiguousvars))
       
   361 
       
   362     def set_rel_constraint(self, term, rel, etypes_func):
       
   363         if isinstance(term, VariableRef) and self.is_ambiguous(term.variable):
       
   364             var = term.variable
       
   365             if len(var.stinfo['relations']) == 1 \
       
   366                    or rel.scope is var.scope or rel.r_type == 'identity':
       
   367                 self.restrict(var, frozenset(etypes_func()))
       
   368                 try:
       
   369                     self.maydeambrels[var].add(rel)
       
   370                 except KeyError:
       
   371                     self.maydeambrels[var] = set((rel,))
       
   372 
       
   373     def deambiguifying_relation(self, var, rel):
       
   374         lhs, rhs = rel.get_variable_parts()
       
   375         onlhs = var is getattr(lhs, 'variable', None)
       
   376         other = onlhs and rhs or lhs
       
   377         otheretypes = None
       
   378         # XXX isinstance(other.variable, Variable) to skip column alias
       
   379         if isinstance(other, VariableRef) and isinstance(other.variable, Variable):
       
   380             deambiguifier = other.variable
       
   381             if not var is self.deambification_map.get(deambiguifier):
       
   382                 if var.stinfo['typerel'] is None:
       
   383                     otheretypes = deambiguifier.stinfo['possibletypes']
       
   384                 elif not self.is_ambiguous(deambiguifier):
       
   385                     otheretypes = self.varsols[deambiguifier]
       
   386                 elif deambiguifier in self.not_invariants:
       
   387                     # we know variable won't be invariant, try to use
       
   388                     # it to deambguify the current variable
       
   389                     otheretypes = self.varsols[deambiguifier]
       
   390             if deambiguifier.stinfo['typerel'] is None:
       
   391                 # if deambiguifier has no type restriction using 'is',
       
   392                 # don't record it
       
   393                 deambiguifier = None
       
   394         elif isinstance(other, Constant) and other.uidtype:
       
   395             otheretypes = (other.uidtype,)
       
   396             deambiguifier = None
       
   397         if otheretypes is not None:
       
   398             # to restrict, we must check that for all type in othertypes,
       
   399             # possible types on the other end of the relation are matching
       
   400             # variable's possible types
       
   401             rschema = self.rschema(rel.r_type)
       
   402             if onlhs:
       
   403                 rtypefunc = rschema.subjects
       
   404             else:
       
   405                 rtypefunc = rschema.objects
       
   406             for otheretype in otheretypes:
       
   407                 reltypes = frozenset(rtypefunc(otheretype))
       
   408                 if var.stinfo['possibletypes'] != reltypes:
       
   409                     return False
       
   410             self.restrict(var, var.stinfo['possibletypes'])
       
   411             self.deambification_map[var] = deambiguifier
       
   412             return True
       
   413         return False