diff -r 058bb3dc685f -r 0b59724cb3f2 server/rqlannotation.py --- a/server/rqlannotation.py Mon Jan 04 18:40:30 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,413 +0,0 @@ -# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. -# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr -# -# This file is part of CubicWeb. -# -# CubicWeb is free software: you can redistribute it and/or modify it under the -# terms of the GNU Lesser General Public License as published by the Free -# Software Foundation, either version 2.1 of the License, or (at your option) -# any later version. -# -# CubicWeb is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -# details. -# -# You should have received a copy of the GNU Lesser General Public License along -# with CubicWeb. If not, see . -"""Functions to add additional annotations on a rql syntax tree to ease later -code generation. -""" -from __future__ import print_function - -__docformat__ = "restructuredtext en" - -from rql import BadRQLQuery -from rql.nodes import Relation, VariableRef, Constant, Variable, Or, Exists -from rql.utils import common_parent - -def _annotate_select(annotator, rqlst): - has_text_query = False - for subquery in rqlst.with_: - if annotator._annotate_union(subquery.query): - has_text_query = True - #if server.DEBUG: - # print '-------- sql annotate', repr(rqlst) - getrschema = annotator.schema.rschema - for var in rqlst.defined_vars.values(): - stinfo = var.stinfo - if stinfo.get('ftirels'): - has_text_query = True - if stinfo['attrvar']: - stinfo['invariant'] = False - stinfo['principal'] = _select_main_var(stinfo['rhsrelations']) - continue - if not stinfo['relations'] and stinfo['typerel'] is None: - # Any X, Any MAX(X)... - # those particular queries should be executed using the system - # entities table unless there is some type restriction - stinfo['invariant'] = True - stinfo['principal'] = None - continue - if any(rel for rel in stinfo['relations'] if rel.r_type == 'eid' and rel.operator() != '=') and \ - not any(r for r in var.stinfo['relations'] - var.stinfo['rhsrelations'] - if r.r_type != 'eid' and (getrschema(r.r_type).inlined or getrschema(r.r_type).final)): - # Any X WHERE X eid > 2 - # those particular queries should be executed using the system entities table - stinfo['invariant'] = True - stinfo['principal'] = None - continue - if stinfo['selected'] and var.valuable_references() == 1+bool(stinfo['constnode']): - # "Any X", "Any X, Y WHERE X attr Y" - stinfo['invariant'] = False - continue - joins = set() - invariant = False - for ref in var.references(): - rel = ref.relation() - if rel is None or rel.is_types_restriction(): - continue - lhs, rhs = rel.get_parts() - onlhs = ref is lhs - role = 'subject' if onlhs else 'object' - if rel.r_type == 'eid': - if not (onlhs and len(stinfo['relations']) > 1): - break - if not stinfo['constnode']: - joins.add( (rel, role) ) - continue - elif rel.r_type == 'identity': - # identity can't be used as principal, so check other relation are used - # XXX explain rhs.operator == '=' - if rhs.operator != '=' or len(stinfo['relations']) <= 1: #(stinfo['constnode'] and rhs.operator == '='): - break - joins.add( (rel, role) ) - continue - rschema = getrschema(rel.r_type) - if rel.optional: - if rel in stinfo.get('optrelations', ()): - # optional variable can't be invariant if this is the lhs - # variable of an inlined relation - if not rel in stinfo['rhsrelations'] and rschema.inlined: - break - # variable used as main variable of an optional relation can't - # be invariant, unless we can use some other relation as - # reference for the outer join - elif not stinfo['constnode']: - break - elif len(stinfo['relations']) == 2: - if onlhs: - ostinfo = rhs.children[0].variable.stinfo - else: - ostinfo = lhs.variable.stinfo - if not (ostinfo.get('optcomparisons') or - any(orel for orel in ostinfo['relations'] - if orel.optional and orel is not rel)): - break - if rschema.final or (onlhs and rschema.inlined): - if rschema.type != 'has_text': - # need join anyway if the variable appears in a final or - # inlined relation - break - joins.add( (rel, role) ) - continue - if not stinfo['constnode']: - if rschema.inlined and rel.neged(strict=True): - # if relation is inlined, can't be invariant if that - # variable is used anywhere else. - # see 'Any P WHERE NOT N ecrit_par P, N eid 512': - # sql for 'NOT N ecrit_par P' is 'N.ecrit_par is NULL' so P - # can use N.ecrit_par as principal - if (stinfo['selected'] or len(stinfo['relations']) > 1): - break - joins.add( (rel, role) ) - else: - # if there is at least one ambigous relation and no other to - # restrict types, can't be invariant since we need to filter out - # other types - if not annotator.is_ambiguous(var): - invariant = True - stinfo['invariant'] = invariant - if invariant and joins: - # remember rqlst/solutions analyze information - # we have to select a kindof "main" relation which will "extrajoins" - # the other - # priority should be given to relation which are not in inner queries - # (eg exists) - try: - stinfo['principal'] = principal = _select_principal(var.scope, joins) - if getrschema(principal.r_type).inlined: - # the scope of the lhs variable must be equal or outer to the - # rhs variable's scope (since it's retrieved from lhs's table) - sstinfo = principal.children[0].variable.stinfo - sstinfo['scope'] = common_parent(sstinfo['scope'], stinfo['scope']).scope - except CantSelectPrincipal: - stinfo['invariant'] = False - # see unittest_rqlannotation. test_has_text_security_cache_bug - # XXX probably more to do, but yet that work without more... - for col_alias in rqlst.aliases.values(): - if col_alias.stinfo.get('ftirels'): - has_text_query = True - return has_text_query - - - -class CantSelectPrincipal(Exception): - """raised when no 'principal' variable can be found""" - -def _select_principal(scope, relations, _sort=lambda x:x): - """given a list of rqlst relations, select one which will be used to - represent an invariant variable (e.g. using on extremity of the relation - instead of the variable's type table - """ - # _sort argument is there for test - diffscope_rels = {} - ored_rels = set() - diffscope_rels = set() - for rel, role in _sort(relations): - # note: only eid and has_text among all final relations may be there - if rel.r_type in ('eid', 'identity'): - continue - if rel.optional is not None and len(relations) > 1: - if role == 'subject' and rel.optional == 'right': - continue - if role == 'object' and rel.optional == 'left': - continue - if rel.ored(traverse_scope=True): - ored_rels.add(rel) - elif rel.scope is scope: - return rel - elif not rel.neged(traverse_scope=True): - diffscope_rels.add(rel) - if len(ored_rels) > 1: - ored_rels_copy = tuple(ored_rels) - for rel1 in ored_rels_copy: - for rel2 in ored_rels_copy: - if rel1 is rel2: - continue - if isinstance(common_parent(rel1, rel2), Or): - ored_rels.discard(rel1) - ored_rels.discard(rel2) - for rel in _sort(ored_rels): - if rel.scope is scope: - return rel - diffscope_rels.add(rel) - # if DISTINCT query, can use variable from a different scope as principal - # since introduced duplicates will be removed - if scope.stmt.distinct and diffscope_rels: - return next(iter(_sort(diffscope_rels))) - # XXX could use a relation from a different scope if it can't generate - # duplicates, so we should have to check cardinality - raise CantSelectPrincipal() - -def _select_main_var(relations): - """given a list of rqlst relations, select one which will be used as main - relation for the rhs variable - """ - principal = None - others = [] - # sort for test predictability - for rel in sorted(relations, key=lambda x: (x.children[0].name, x.r_type)): - # only equality relation with a variable as rhs may be principal - if rel.operator() not in ('=', 'IS') \ - or not isinstance(rel.children[1].children[0], VariableRef) or rel.neged(strict=True): - continue - if rel.optional: - others.append(rel) - continue - if rel.scope is rel.stmt: - return rel - principal = rel - if principal is None: - if others: - return others[0] - raise BadRQLQuery('unable to find principal in %s' % ', '.join( - r.as_string() for r in relations)) - return principal - - -def set_qdata(getrschema, union, noinvariant): - """recursive function to set querier data on variables in the syntax tree - """ - for select in union.children: - for subquery in select.with_: - set_qdata(getrschema, subquery.query, noinvariant) - for var in select.defined_vars.values(): - if var.stinfo['invariant']: - if var in noinvariant and not var.stinfo['principal'].r_type == 'has_text': - var._q_invariant = False - else: - var._q_invariant = True - else: - var._q_invariant = False - - -class SQLGenAnnotator(object): - def __init__(self, schema): - self.schema = schema - self.nfdomain = frozenset(eschema.type for eschema in schema.entities() - if not eschema.final) - - def annotate(self, rqlst): - """add information to the rql syntax tree to help sources to do their - job (read sql generation) - - a variable is tagged as invariant if: - * it's a non final variable - * it's not used as lhs in any final or inlined relation - * there is no type restriction on this variable (either explicit in the - syntax tree or because a solution for this variable has been removed - due to security filtering) - """ - #assert rqlst.TYPE == 'select', rqlst - rqlst.has_text_query = self._annotate_union(rqlst) - - def _annotate_union(self, union): - has_text_query = False - for select in union.children: - if _annotate_select(self, select): - has_text_query = True - return has_text_query - - def is_ambiguous(self, var): - # ignore has_text relation when we know it will be used as principal. - # This is expected by the rql2sql generator which will use the `entities` - # table to filter out by type if necessary, This optimisation is very - # interesting in multi-sources cases, as it may avoid a costly query - # on sources to get all entities of a given type to achieve this, while - # we have all the necessary information. - root = var.stmt.root # Union node - # rel.scope -> Select or Exists node, so add .parent to get Union from - # Select node - rels = [rel for rel in var.stinfo['relations'] if rel.scope.parent is root] - if len(rels) == 1 and rels[0].r_type == 'has_text': - return False - try: - data = var.stmt._deamb_data - except AttributeError: - data = var.stmt._deamb_data = IsAmbData(self.schema, self.nfdomain) - data.compute(var.stmt) - return data.is_ambiguous(var) - - -class IsAmbData(object): - def __init__(self, schema, nfdomain): - self.schema = schema - # shortcuts - self.rschema = schema.rschema - self.eschema = schema.eschema - # domain for non final variables - self.nfdomain = nfdomain - # {var: possible solutions set} - self.varsols = {} - # set of ambiguous variables - self.ambiguousvars = set() - # remember if a variable has been deambiguified by another to avoid - # doing the opposite - self.deambification_map = {} - # not invariant variables (access to final.inlined relation) - self.not_invariants = set() - - def is_ambiguous(self, var): - return var in self.ambiguousvars - - def restrict(self, var, restricted_domain): - self.varsols[var] &= restricted_domain - if var in self.ambiguousvars and self.varsols[var] == var.stinfo['possibletypes']: - self.ambiguousvars.remove(var) - - def compute(self, rqlst): - # set domains for each variable - for varname, var in rqlst.defined_vars.items(): - if var.stinfo['uidrel'] is not None or \ - self.eschema(rqlst.solutions[0][varname]).final: - ptypes = var.stinfo['possibletypes'] - else: - ptypes = set(self.nfdomain) - self.ambiguousvars.add(var) - self.varsols[var] = ptypes - if not self.ambiguousvars: - return - # apply relation restriction - self.maydeambrels = maydeambrels = {} - for rel in rqlst.iget_nodes(Relation): - if rel.r_type == 'eid' or rel.is_types_restriction(): - continue - lhs, rhs = rel.get_variable_parts() - if isinstance(lhs, VariableRef) or isinstance(rhs, VariableRef): - rschema = self.rschema(rel.r_type) - if rschema.inlined or rschema.final: - self.not_invariants.add(lhs.variable) - self.set_rel_constraint(lhs, rel, rschema.subjects) - self.set_rel_constraint(rhs, rel, rschema.objects) - # try to deambiguify more variables by considering other variables'type - modified = True - while modified and self.ambiguousvars: - modified = False - for var in self.ambiguousvars.copy(): - try: - for rel in (var.stinfo['relations'] & maydeambrels[var]): - if self.deambiguifying_relation(var, rel): - modified = True - break - except KeyError: - # no relation to deambiguify - continue - - def _debug_print(self): - print('varsols', dict((x, sorted(str(v) for v in values)) - for x, values in self.varsols.items())) - print('ambiguous vars', sorted(self.ambiguousvars)) - - def set_rel_constraint(self, term, rel, etypes_func): - if isinstance(term, VariableRef) and self.is_ambiguous(term.variable): - var = term.variable - if len(var.stinfo['relations']) == 1 \ - or rel.scope is var.scope or rel.r_type == 'identity': - self.restrict(var, frozenset(etypes_func())) - try: - self.maydeambrels[var].add(rel) - except KeyError: - self.maydeambrels[var] = set((rel,)) - - def deambiguifying_relation(self, var, rel): - lhs, rhs = rel.get_variable_parts() - onlhs = var is getattr(lhs, 'variable', None) - other = onlhs and rhs or lhs - otheretypes = None - # XXX isinstance(other.variable, Variable) to skip column alias - if isinstance(other, VariableRef) and isinstance(other.variable, Variable): - deambiguifier = other.variable - if not var is self.deambification_map.get(deambiguifier): - if var.stinfo['typerel'] is None: - otheretypes = deambiguifier.stinfo['possibletypes'] - elif not self.is_ambiguous(deambiguifier): - otheretypes = self.varsols[deambiguifier] - elif deambiguifier in self.not_invariants: - # we know variable won't be invariant, try to use - # it to deambguify the current variable - otheretypes = self.varsols[deambiguifier] - if deambiguifier.stinfo['typerel'] is None: - # if deambiguifier has no type restriction using 'is', - # don't record it - deambiguifier = None - elif isinstance(other, Constant) and other.uidtype: - otheretypes = (other.uidtype,) - deambiguifier = None - if otheretypes is not None: - # to restrict, we must check that for all type in othertypes, - # possible types on the other end of the relation are matching - # variable's possible types - rschema = self.rschema(rel.r_type) - if onlhs: - rtypefunc = rschema.subjects - else: - rtypefunc = rschema.objects - for otheretype in otheretypes: - reltypes = frozenset(rtypefunc(otheretype)) - if var.stinfo['possibletypes'] != reltypes: - return False - self.restrict(var, var.stinfo['possibletypes']) - self.deambification_map[var] = deambiguifier - return True - return False