server/rqlannotation.py
changeset 0 b97547f5f1fa
child 339 c0a0ce6c0428
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
       
     1 """Functions to add additional annotations on a rql syntax tree to ease later
       
     2 code generation.
       
     3 
       
     4 :organization: Logilab
       
     5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     7 """
       
     8 __docformat__ = "restructuredtext en"
       
     9 
       
    10 from logilab.common.compat import any
       
    11 
       
    12 from rql.nodes import Relation, Exists, VariableRef, Constant, Variable, Or
       
    13 from rql.utils import common_parent
       
    14 
       
    15 from cubicweb import server
       
    16 
       
    17 def _annotate_select(annotator, rqlst):
       
    18     for subquery in rqlst.with_:
       
    19         annotator._annotate_union(subquery.query)
       
    20     #if server.DEBUG:
       
    21     #    print '-------- sql annotate', repr(rqlst)
       
    22     getrschema = annotator.schema.rschema
       
    23     has_text_query = False
       
    24     need_distinct = rqlst.distinct
       
    25     for rel in rqlst.iget_nodes(Relation):
       
    26         if rel.neged(strict=True):
       
    27             if rel.is_types_restriction():
       
    28                 need_distinct = True
       
    29             else:
       
    30                 rschema = getrschema(rel.r_type)
       
    31                 if rschema.inlined:
       
    32                     try:
       
    33                         var = rel.children[1].children[0].variable
       
    34                     except AttributeError:
       
    35                         pass # rewritten variable
       
    36                     else:
       
    37                         if not var.stinfo['constnode']:
       
    38                             need_distinct = True
       
    39         elif getrschema(rel.r_type).symetric:
       
    40             for vref in rel.iget_nodes(VariableRef):
       
    41                 stinfo = vref.variable.stinfo
       
    42                 if not stinfo['constnode'] and stinfo['selected']:
       
    43                     need_distinct = True
       
    44                     # XXX could mark as not invariant
       
    45                     break
       
    46     for name, var in rqlst.defined_vars.items():
       
    47         stinfo = var.stinfo
       
    48         if stinfo.get('ftirels'):
       
    49             has_text_query = True
       
    50         if stinfo['attrvar']:
       
    51             stinfo['invariant'] = False
       
    52             stinfo['principal'] = _select_main_var(stinfo['rhsrelations'])
       
    53             continue
       
    54         if not stinfo['relations'] and not stinfo['typerels']:
       
    55             # Any X, Any MAX(X)...
       
    56             # those particular queries should be executed using the system
       
    57             # entities table unless there is some type restriction
       
    58             stinfo['invariant'] = True
       
    59             stinfo['principal'] = None
       
    60             continue
       
    61         if any(rel for rel in stinfo['relations'] if rel.r_type == 'eid' and rel.operator() != '=') and \
       
    62                not any(r for r in var.stinfo['relations'] - var.stinfo['rhsrelations']
       
    63                        if r.r_type != 'eid' and (getrschema(r.r_type).inlined or getrschema(r.r_type).final)):
       
    64             # Any X WHERE X eid > 2
       
    65             # those particular queries should be executed using the system entities table
       
    66             stinfo['invariant'] = True
       
    67             stinfo['principal'] = None
       
    68             continue
       
    69         if stinfo['selected'] and var.valuable_references() == 1+bool(stinfo['constnode']):
       
    70             # "Any X", "Any X, Y WHERE X attr Y"
       
    71             stinfo['invariant'] = False
       
    72             continue
       
    73         joins = set()            
       
    74         invariant = False
       
    75         for ref in var.references():
       
    76             rel = ref.relation()
       
    77             if rel is None or rel.is_types_restriction():
       
    78                 continue
       
    79             lhs, rhs = rel.get_parts()
       
    80             onlhs = ref is lhs
       
    81             if rel.r_type == 'eid':
       
    82                 if not (onlhs and len(stinfo['relations']) > 1): 
       
    83                     break
       
    84                 if not stinfo['constnode']:
       
    85                     joins.add(rel)
       
    86                 continue
       
    87             elif rel.r_type == 'identity':
       
    88                 # identity can't be used as principal, so check other relation are used
       
    89                 # XXX explain rhs.operator == '='
       
    90                 if rhs.operator != '=' or len(stinfo['relations']) <= 1: #(stinfo['constnode'] and rhs.operator == '='):
       
    91                     break
       
    92                 joins.add(rel)
       
    93                 continue
       
    94             rschema = getrschema(rel.r_type)
       
    95             if rel.optional:
       
    96                 if rel in stinfo['optrelations']:
       
    97                     # optional variable can't be invariant if this is the lhs
       
    98                     # variable of an inlined relation
       
    99                     if not rel in stinfo['rhsrelations'] and rschema.inlined:
       
   100                         break
       
   101                 else:
       
   102                     # variable used as main variable of an optional relation
       
   103                     # can't be invariant
       
   104                     break
       
   105             if rschema.final or (onlhs and rschema.inlined):
       
   106                 if rschema.type != 'has_text':
       
   107                     # need join anyway if the variable appears in a final or
       
   108                     # inlined relation
       
   109                     break
       
   110                 joins.add(rel)
       
   111                 continue
       
   112             if not stinfo['constnode']:
       
   113                 if rschema.inlined and rel.neged(strict=True):
       
   114                     # if relation is inlined, can't be invariant if that 
       
   115                     # variable is used anywhere else.
       
   116                     # see 'Any P WHERE NOT N ecrit_par P, N eid 512':                    
       
   117                     # sql for 'NOT N ecrit_par P' is 'N.ecrit_par is NULL' so P
       
   118                     # can use N.ecrit_par as principal
       
   119                     if (stinfo['selected'] or len(stinfo['relations']) > 1):
       
   120                         break
       
   121                 elif rschema.symetric and stinfo['selected']:
       
   122                     break
       
   123             joins.add(rel)
       
   124         else:
       
   125             # if there is at least one ambigous relation and no other to
       
   126             # restrict types, can't be invariant since we need to filter out
       
   127             # other types
       
   128             if not annotator.is_ambiguous(var):
       
   129                 invariant = True
       
   130         stinfo['invariant'] = invariant
       
   131         if invariant and joins:
       
   132             # remember rqlst/solutions analyze information
       
   133             # we have to select a kindof "main" relation which will "extrajoins"
       
   134             # the other
       
   135             # priority should be given to relation which are not in inner queries
       
   136             # (eg exists)
       
   137             try:
       
   138                 stinfo['principal'] = _select_principal(var.sqlscope, joins)
       
   139             except CantSelectPrincipal:
       
   140                 stinfo['invariant'] = False
       
   141     rqlst.need_distinct = need_distinct
       
   142     return has_text_query
       
   143 
       
   144 
       
   145 
       
   146 class CantSelectPrincipal(Exception): pass
       
   147 
       
   148 def _select_principal(sqlscope, relations):
       
   149     """given a list of rqlst relations, select one which will be used to
       
   150     represent an invariant variable (e.g. using on extremity of the relation
       
   151     instead of the variable's type table
       
   152     """
       
   153     diffscope_rels = {}
       
   154     has_same_scope_rel = False
       
   155     ored_rels = set()
       
   156     diffscope_rels = set()
       
   157     for rel in relations:
       
   158         # note: only eid and has_text among all final relations may be there
       
   159         if rel.r_type in ('eid', 'identity'):
       
   160             has_same_scope_rel = rel.sqlscope is sqlscope
       
   161             continue
       
   162         if rel.ored(traverse_scope=True):
       
   163             ored_rels.add(rel)
       
   164         elif rel.sqlscope is sqlscope:
       
   165             return rel
       
   166         elif not rel.neged(traverse_scope=True):
       
   167             diffscope_rels.add(rel)
       
   168     if len(ored_rels) > 1:
       
   169         ored_rels_copy = tuple(ored_rels)
       
   170         for rel1 in ored_rels_copy:
       
   171             for rel2 in ored_rels_copy:
       
   172                 if rel1 is rel2:
       
   173                     continue
       
   174                 if isinstance(common_parent(rel1, rel2), Or):
       
   175                     ored_rels.discard(rel1)
       
   176                     ored_rels.discard(rel2)
       
   177     for rel in ored_rels:
       
   178         if rel.sqlscope is sqlscope:
       
   179             return rel
       
   180         diffscope_rels.add(rel)
       
   181     # if DISTINCT query, can use variable from a different scope as principal
       
   182     # since introduced duplicates will be removed
       
   183     if sqlscope.stmt.distinct and diffscope_rels:
       
   184         return iter(diffscope_rels).next()
       
   185     # XXX  could use a relation for a different scope if it can't generate
       
   186     # duplicates, so we would have to check cardinality
       
   187     raise CantSelectPrincipal()
       
   188     
       
   189 
       
   190 def _select_main_var(relations):
       
   191     """given a list of rqlst relations, select one which will be used as main
       
   192     relation for the rhs variable
       
   193     """
       
   194     for rel in relations:
       
   195         if rel.sqlscope is rel.stmt:
       
   196             return rel
       
   197         principal = rel
       
   198     return principal
       
   199 
       
   200 
       
   201 def set_qdata(union, noinvariant):
       
   202     """recursive function to set querier data on variables in the syntax tree
       
   203     """
       
   204     for select in union.children:
       
   205         for subquery in select.with_:
       
   206             set_qdata(subquery.query, noinvariant)
       
   207         for var in select.defined_vars.itervalues():
       
   208             if var.stinfo['invariant']:
       
   209                 if var in noinvariant and not var.stinfo['principal'].r_type == 'has_text':
       
   210                     var._q_invariant = False
       
   211                 else:
       
   212                     var._q_invariant = True
       
   213             else:
       
   214                 var._q_invariant = False
       
   215 
       
   216 
       
   217 class SQLGenAnnotator(object):
       
   218     def __init__(self, schema):
       
   219         self.schema = schema
       
   220         self.nfdomain = frozenset(eschema.type for eschema in schema.entities()
       
   221                                   if not eschema.is_final())
       
   222 
       
   223     def annotate(self, rqlst):
       
   224         """add information to the rql syntax tree to help sources to do their
       
   225         job (read sql generation)
       
   226 
       
   227         a variable is tagged as invariant if:
       
   228         * it's a non final variable
       
   229         * it's not used as lhs in any final or inlined relation
       
   230         * there is no type restriction on this variable (either explicit in the
       
   231           syntax tree or because a solution for this variable has been removed
       
   232           due to security filtering)
       
   233         """
       
   234         assert rqlst.TYPE == 'select', rqlst
       
   235         rqlst.has_text_query = self._annotate_union(rqlst)
       
   236 
       
   237     def _annotate_union(self, union):
       
   238         has_text_query = False
       
   239         for select in union.children:
       
   240             htq = _annotate_select(self, select)
       
   241             if htq:
       
   242                 has_text_query = True
       
   243         return has_text_query
       
   244 
       
   245 
       
   246     def is_ambiguous(self, var):
       
   247         # ignore has_text relation
       
   248         if len([rel for rel in var.stinfo['relations']
       
   249                 if rel.sqlscope is var.sqlscope and rel.r_type == 'has_text']) == 1:
       
   250             return False
       
   251         try:
       
   252             data = var.stmt._deamb_data
       
   253         except AttributeError: 
       
   254             data = var.stmt._deamb_data = IsAmbData(self.schema, self.nfdomain)
       
   255             data.compute(var.stmt)
       
   256         return data.is_ambiguous(var)
       
   257 
       
   258         
       
   259 class IsAmbData(object):
       
   260     def __init__(self, schema, nfdomain):
       
   261         self.schema = schema
       
   262         # shortcuts
       
   263         self.rschema = schema.rschema
       
   264         self.eschema = schema.eschema
       
   265         # domain for non final variables
       
   266         self.nfdomain = nfdomain
       
   267         # {var: possible solutions set}
       
   268         self.varsols = {}
       
   269         # set of ambiguous variables
       
   270         self.ambiguousvars = set()
       
   271         # remember if a variable has been deambiguified by another to avoid
       
   272         # doing the opposite
       
   273         self.deambification_map = {}
       
   274         # not invariant variables (access to final.inlined relation)
       
   275         self.not_invariants = set()
       
   276         
       
   277     def is_ambiguous(self, var):
       
   278         return var in self.ambiguousvars
       
   279 
       
   280     def restrict(self, var, restricted_domain):
       
   281         self.varsols[var] &= restricted_domain
       
   282         if var in self.ambiguousvars and self.varsols[var] == var.stinfo['possibletypes']:
       
   283             self.ambiguousvars.remove(var)
       
   284     
       
   285     def compute(self, rqlst):
       
   286         # set domains for each variable
       
   287         for varname, var in rqlst.defined_vars.iteritems():
       
   288             if var.stinfo['uidrels'] or \
       
   289                    self.eschema(rqlst.solutions[0][varname]).is_final():
       
   290                 ptypes = var.stinfo['possibletypes']
       
   291             else:
       
   292                 ptypes = set(self.nfdomain)
       
   293                 self.ambiguousvars.add(var)
       
   294             self.varsols[var] = ptypes
       
   295         if not self.ambiguousvars:
       
   296             return
       
   297         # apply relation restriction
       
   298         self.maydeambrels = maydeambrels = {}
       
   299         for rel in rqlst.iget_nodes(Relation):
       
   300             if rel.is_types_restriction() or rel.r_type == 'eid':
       
   301                 continue
       
   302             lhs, rhs = rel.get_variable_parts()
       
   303             if isinstance(lhs, VariableRef) or isinstance(rhs, VariableRef):
       
   304                 rschema = self.rschema(rel.r_type)
       
   305                 if rschema.inlined or rschema.is_final():
       
   306                     self.not_invariants.add(lhs.variable)
       
   307                 self.set_rel_constraint(lhs, rel, rschema.subjects)
       
   308                 self.set_rel_constraint(rhs, rel, rschema.objects)
       
   309         # try to deambiguify more variables by considering other variables'type
       
   310         modified = True
       
   311         while modified and self.ambiguousvars:
       
   312             modified = False
       
   313             for var in self.ambiguousvars.copy():
       
   314                 try:
       
   315                     for rel in (var.stinfo['relations'] & maydeambrels[var]):
       
   316                         if self.deambiguifying_relation(var, rel):
       
   317                             modified = True
       
   318                             break
       
   319                 except KeyError:
       
   320                     # no relation to deambiguify
       
   321                     continue
       
   322 
       
   323     def _debug_print(self):
       
   324         print 'varsols', dict((x, sorted(str(v) for v in values))
       
   325                                for x, values in self.varsols.iteritems())
       
   326         print 'ambiguous vars', sorted(self.ambiguousvars)
       
   327 
       
   328     def set_rel_constraint(self, term, rel, etypes_func):
       
   329         if isinstance(term, VariableRef) and self.is_ambiguous(term.variable):
       
   330             var = term.variable
       
   331             if len(var.stinfo['relations'] - var.stinfo['typerels']) == 1 \
       
   332                    or rel.sqlscope is var.sqlscope:
       
   333                 self.restrict(var, frozenset(etypes_func()))
       
   334                 try:
       
   335                     self.maydeambrels[var].add(rel)
       
   336                 except KeyError:
       
   337                     self.maydeambrels[var] = set((rel,))
       
   338         
       
   339     def deambiguifying_relation(self, var, rel):
       
   340         lhs, rhs = rel.get_variable_parts()
       
   341         onlhs = var is getattr(lhs, 'variable', None)
       
   342         other = onlhs and rhs or lhs
       
   343         otheretypes = None
       
   344         # XXX isinstance(other.variable, Variable) to skip column alias
       
   345         if isinstance(other, VariableRef) and isinstance(other.variable, Variable):
       
   346             deambiguifier = other.variable
       
   347             if not var is self.deambification_map.get(deambiguifier):
       
   348                 if not var.stinfo['typerels']:
       
   349                     otheretypes = deambiguifier.stinfo['possibletypes']
       
   350                 elif not self.is_ambiguous(deambiguifier):
       
   351                     otheretypes = self.varsols[deambiguifier]
       
   352                 elif deambiguifier in self.not_invariants:
       
   353                     # we know variable won't be invariant, try to use
       
   354                     # it to deambguify the current variable
       
   355                     otheretypes = self.varsols[deambiguifier]
       
   356         elif isinstance(other, Constant) and other.uidtype:
       
   357             otheretypes = (other.uidtype,)
       
   358             deambiguifier = None
       
   359         if otheretypes is not None:
       
   360             # unless types for variable are already non-ambigous, check
       
   361             # if this relation has some type ambiguity
       
   362             rschema = self.rschema(rel.r_type)
       
   363             if onlhs:
       
   364                 rtypefunc = rschema.subjects
       
   365             else:
       
   366                 rtypefunc = rschema.objects
       
   367             for otheretype in otheretypes:
       
   368                 reltypes = frozenset(rtypefunc(otheretype))
       
   369                 if var.stinfo['possibletypes'] != reltypes:
       
   370                     break
       
   371                 self.restrict(var, reltypes)
       
   372                 self.deambification_map[var] = deambiguifier
       
   373                 return True
       
   374         return False