cubicweb/server/rqlannotation.py
changeset 11057 0b59724cb3f2
parent 10669 155c29e0ed1c
child 11703 670aa9bf0b6c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cubicweb/server/rqlannotation.py	Sat Jan 16 13:48:51 2016 +0100
@@ -0,0 +1,413 @@
+# copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+#
+# This file is part of CubicWeb.
+#
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
+"""Functions to add additional annotations on a rql syntax tree to ease later
+code generation.
+"""
+from __future__ import print_function
+
+__docformat__ = "restructuredtext en"
+
+from rql import BadRQLQuery
+from rql.nodes import Relation, VariableRef, Constant, Variable, Or, Exists
+from rql.utils import common_parent
+
+def _annotate_select(annotator, rqlst):
+    has_text_query = False
+    for subquery in rqlst.with_:
+        if annotator._annotate_union(subquery.query):
+            has_text_query = True
+    #if server.DEBUG:
+    #    print '-------- sql annotate', repr(rqlst)
+    getrschema = annotator.schema.rschema
+    for var in rqlst.defined_vars.values():
+        stinfo = var.stinfo
+        if stinfo.get('ftirels'):
+            has_text_query = True
+        if stinfo['attrvar']:
+            stinfo['invariant'] = False
+            stinfo['principal'] = _select_main_var(stinfo['rhsrelations'])
+            continue
+        if not stinfo['relations'] and stinfo['typerel'] is None:
+            # Any X, Any MAX(X)...
+            # those particular queries should be executed using the system
+            # entities table unless there is some type restriction
+            stinfo['invariant'] = True
+            stinfo['principal'] = None
+            continue
+        if any(rel for rel in stinfo['relations'] if rel.r_type == 'eid' and rel.operator() != '=') and \
+               not any(r for r in var.stinfo['relations'] - var.stinfo['rhsrelations']
+                       if r.r_type != 'eid' and (getrschema(r.r_type).inlined or getrschema(r.r_type).final)):
+            # Any X WHERE X eid > 2
+            # those particular queries should be executed using the system entities table
+            stinfo['invariant'] = True
+            stinfo['principal'] = None
+            continue
+        if stinfo['selected'] and var.valuable_references() == 1+bool(stinfo['constnode']):
+            # "Any X", "Any X, Y WHERE X attr Y"
+            stinfo['invariant'] = False
+            continue
+        joins = set()
+        invariant = False
+        for ref in var.references():
+            rel = ref.relation()
+            if rel is None or rel.is_types_restriction():
+                continue
+            lhs, rhs = rel.get_parts()
+            onlhs = ref is lhs
+            role = 'subject' if onlhs else 'object'
+            if rel.r_type == 'eid':
+                if not (onlhs and len(stinfo['relations']) > 1):
+                    break
+                if not stinfo['constnode']:
+                    joins.add( (rel, role) )
+                continue
+            elif rel.r_type == 'identity':
+                # identity can't be used as principal, so check other relation are used
+                # XXX explain rhs.operator == '='
+                if rhs.operator != '=' or len(stinfo['relations']) <= 1: #(stinfo['constnode'] and rhs.operator == '='):
+                    break
+                joins.add( (rel, role) )
+                continue
+            rschema = getrschema(rel.r_type)
+            if rel.optional:
+                if rel in stinfo.get('optrelations', ()):
+                    # optional variable can't be invariant if this is the lhs
+                    # variable of an inlined relation
+                    if not rel in stinfo['rhsrelations'] and rschema.inlined:
+                        break
+                # variable used as main variable of an optional relation can't
+                # be invariant, unless we can use some other relation as
+                # reference for the outer join
+                elif not stinfo['constnode']:
+                    break
+                elif len(stinfo['relations']) == 2:
+                    if onlhs:
+                        ostinfo = rhs.children[0].variable.stinfo
+                    else:
+                        ostinfo = lhs.variable.stinfo
+                    if not (ostinfo.get('optcomparisons') or
+                            any(orel for orel in ostinfo['relations']
+                                if orel.optional and orel is not rel)):
+                        break
+            if rschema.final or (onlhs and rschema.inlined):
+                if rschema.type != 'has_text':
+                    # need join anyway if the variable appears in a final or
+                    # inlined relation
+                    break
+                joins.add( (rel, role) )
+                continue
+            if not stinfo['constnode']:
+                if rschema.inlined and rel.neged(strict=True):
+                    # if relation is inlined, can't be invariant if that
+                    # variable is used anywhere else.
+                    # see 'Any P WHERE NOT N ecrit_par P, N eid 512':
+                    # sql for 'NOT N ecrit_par P' is 'N.ecrit_par is NULL' so P
+                    # can use N.ecrit_par as principal
+                    if (stinfo['selected'] or len(stinfo['relations']) > 1):
+                        break
+            joins.add( (rel, role) )
+        else:
+            # if there is at least one ambigous relation and no other to
+            # restrict types, can't be invariant since we need to filter out
+            # other types
+            if not annotator.is_ambiguous(var):
+                invariant = True
+        stinfo['invariant'] = invariant
+        if invariant and joins:
+            # remember rqlst/solutions analyze information
+            # we have to select a kindof "main" relation which will "extrajoins"
+            # the other
+            # priority should be given to relation which are not in inner queries
+            # (eg exists)
+            try:
+                stinfo['principal'] = principal = _select_principal(var.scope, joins)
+                if getrschema(principal.r_type).inlined:
+                    # the scope of the lhs variable must be equal or outer to the
+                    # rhs variable's scope (since it's retrieved from lhs's table)
+                    sstinfo = principal.children[0].variable.stinfo
+                    sstinfo['scope'] = common_parent(sstinfo['scope'], stinfo['scope']).scope
+            except CantSelectPrincipal:
+                stinfo['invariant'] = False
+    # see unittest_rqlannotation. test_has_text_security_cache_bug
+    # XXX probably more to do, but yet that work without more...
+    for col_alias in rqlst.aliases.values():
+        if col_alias.stinfo.get('ftirels'):
+            has_text_query = True
+    return has_text_query
+
+
+
+class CantSelectPrincipal(Exception):
+    """raised when no 'principal' variable can be found"""
+
+def _select_principal(scope, relations, _sort=lambda x:x):
+    """given a list of rqlst relations, select one which will be used to
+    represent an invariant variable (e.g. using on extremity of the relation
+    instead of the variable's type table
+    """
+    # _sort argument is there for test
+    diffscope_rels = {}
+    ored_rels = set()
+    diffscope_rels = set()
+    for rel, role in _sort(relations):
+        # note: only eid and has_text among all final relations may be there
+        if rel.r_type in ('eid', 'identity'):
+            continue
+        if rel.optional is not None and len(relations) > 1:
+            if role == 'subject' and rel.optional == 'right':
+                continue
+            if role == 'object' and rel.optional == 'left':
+                continue
+        if rel.ored(traverse_scope=True):
+            ored_rels.add(rel)
+        elif rel.scope is scope:
+            return rel
+        elif not rel.neged(traverse_scope=True):
+            diffscope_rels.add(rel)
+    if len(ored_rels) > 1:
+        ored_rels_copy = tuple(ored_rels)
+        for rel1 in ored_rels_copy:
+            for rel2 in ored_rels_copy:
+                if rel1 is rel2:
+                    continue
+                if isinstance(common_parent(rel1, rel2), Or):
+                    ored_rels.discard(rel1)
+                    ored_rels.discard(rel2)
+    for rel in _sort(ored_rels):
+        if rel.scope is scope:
+            return rel
+        diffscope_rels.add(rel)
+    # if DISTINCT query, can use variable from a different scope as principal
+    # since introduced duplicates will be removed
+    if scope.stmt.distinct and diffscope_rels:
+        return next(iter(_sort(diffscope_rels)))
+    # XXX could use a relation from a different scope if it can't generate
+    # duplicates, so we should have to check cardinality
+    raise CantSelectPrincipal()
+
+def _select_main_var(relations):
+    """given a list of rqlst relations, select one which will be used as main
+    relation for the rhs variable
+    """
+    principal = None
+    others = []
+    # sort for test predictability
+    for rel in sorted(relations, key=lambda x: (x.children[0].name, x.r_type)):
+        # only equality relation with a variable as rhs may be principal
+        if rel.operator() not in ('=', 'IS') \
+               or not isinstance(rel.children[1].children[0], VariableRef) or rel.neged(strict=True):
+            continue
+        if rel.optional:
+            others.append(rel)
+            continue
+        if rel.scope is rel.stmt:
+            return rel
+        principal = rel
+    if principal is None:
+        if others:
+            return others[0]
+        raise BadRQLQuery('unable to find principal in %s' % ', '.join(
+            r.as_string() for r in relations))
+    return principal
+
+
+def set_qdata(getrschema, union, noinvariant):
+    """recursive function to set querier data on variables in the syntax tree
+    """
+    for select in union.children:
+        for subquery in select.with_:
+            set_qdata(getrschema, subquery.query, noinvariant)
+        for var in select.defined_vars.values():
+            if var.stinfo['invariant']:
+                if var in noinvariant and not var.stinfo['principal'].r_type == 'has_text':
+                    var._q_invariant = False
+                else:
+                    var._q_invariant = True
+            else:
+                var._q_invariant = False
+
+
+class SQLGenAnnotator(object):
+    def __init__(self, schema):
+        self.schema = schema
+        self.nfdomain = frozenset(eschema.type for eschema in schema.entities()
+                                  if not eschema.final)
+
+    def annotate(self, rqlst):
+        """add information to the rql syntax tree to help sources to do their
+        job (read sql generation)
+
+        a variable is tagged as invariant if:
+        * it's a non final variable
+        * it's not used as lhs in any final or inlined relation
+        * there is no type restriction on this variable (either explicit in the
+          syntax tree or because a solution for this variable has been removed
+          due to security filtering)
+        """
+        #assert rqlst.TYPE == 'select', rqlst
+        rqlst.has_text_query = self._annotate_union(rqlst)
+
+    def _annotate_union(self, union):
+        has_text_query = False
+        for select in union.children:
+            if _annotate_select(self, select):
+                has_text_query = True
+        return has_text_query
+
+    def is_ambiguous(self, var):
+        # ignore has_text relation when we know it will be used as principal.
+        # This is expected by the rql2sql generator which will use the `entities`
+        # table to filter out by type if necessary, This optimisation is very
+        # interesting in multi-sources cases, as it may avoid a costly query
+        # on sources to get all entities of a given type to achieve this, while
+        # we have all the necessary information.
+        root = var.stmt.root # Union node
+        # rel.scope -> Select or Exists node, so add .parent to get Union from
+        # Select node
+        rels = [rel for rel in var.stinfo['relations'] if rel.scope.parent is root]
+        if len(rels) == 1 and rels[0].r_type == 'has_text':
+            return False
+        try:
+            data = var.stmt._deamb_data
+        except AttributeError:
+            data = var.stmt._deamb_data = IsAmbData(self.schema, self.nfdomain)
+            data.compute(var.stmt)
+        return data.is_ambiguous(var)
+
+
+class IsAmbData(object):
+    def __init__(self, schema, nfdomain):
+        self.schema = schema
+        # shortcuts
+        self.rschema = schema.rschema
+        self.eschema = schema.eschema
+        # domain for non final variables
+        self.nfdomain = nfdomain
+        # {var: possible solutions set}
+        self.varsols = {}
+        # set of ambiguous variables
+        self.ambiguousvars = set()
+        # remember if a variable has been deambiguified by another to avoid
+        # doing the opposite
+        self.deambification_map = {}
+        # not invariant variables (access to final.inlined relation)
+        self.not_invariants = set()
+
+    def is_ambiguous(self, var):
+        return var in self.ambiguousvars
+
+    def restrict(self, var, restricted_domain):
+        self.varsols[var] &= restricted_domain
+        if var in self.ambiguousvars and self.varsols[var] == var.stinfo['possibletypes']:
+            self.ambiguousvars.remove(var)
+
+    def compute(self, rqlst):
+        # set domains for each variable
+        for varname, var in rqlst.defined_vars.items():
+            if var.stinfo['uidrel'] is not None or \
+                   self.eschema(rqlst.solutions[0][varname]).final:
+                ptypes = var.stinfo['possibletypes']
+            else:
+                ptypes = set(self.nfdomain)
+                self.ambiguousvars.add(var)
+            self.varsols[var] = ptypes
+        if not self.ambiguousvars:
+            return
+        # apply relation restriction
+        self.maydeambrels = maydeambrels = {}
+        for rel in rqlst.iget_nodes(Relation):
+            if rel.r_type == 'eid' or rel.is_types_restriction():
+                continue
+            lhs, rhs = rel.get_variable_parts()
+            if isinstance(lhs, VariableRef) or isinstance(rhs, VariableRef):
+                rschema = self.rschema(rel.r_type)
+                if rschema.inlined or rschema.final:
+                    self.not_invariants.add(lhs.variable)
+                self.set_rel_constraint(lhs, rel, rschema.subjects)
+                self.set_rel_constraint(rhs, rel, rschema.objects)
+        # try to deambiguify more variables by considering other variables'type
+        modified = True
+        while modified and self.ambiguousvars:
+            modified = False
+            for var in self.ambiguousvars.copy():
+                try:
+                    for rel in (var.stinfo['relations'] & maydeambrels[var]):
+                        if self.deambiguifying_relation(var, rel):
+                            modified = True
+                            break
+                except KeyError:
+                    # no relation to deambiguify
+                    continue
+
+    def _debug_print(self):
+        print('varsols', dict((x, sorted(str(v) for v in values))
+                               for x, values in self.varsols.items()))
+        print('ambiguous vars', sorted(self.ambiguousvars))
+
+    def set_rel_constraint(self, term, rel, etypes_func):
+        if isinstance(term, VariableRef) and self.is_ambiguous(term.variable):
+            var = term.variable
+            if len(var.stinfo['relations']) == 1 \
+                   or rel.scope is var.scope or rel.r_type == 'identity':
+                self.restrict(var, frozenset(etypes_func()))
+                try:
+                    self.maydeambrels[var].add(rel)
+                except KeyError:
+                    self.maydeambrels[var] = set((rel,))
+
+    def deambiguifying_relation(self, var, rel):
+        lhs, rhs = rel.get_variable_parts()
+        onlhs = var is getattr(lhs, 'variable', None)
+        other = onlhs and rhs or lhs
+        otheretypes = None
+        # XXX isinstance(other.variable, Variable) to skip column alias
+        if isinstance(other, VariableRef) and isinstance(other.variable, Variable):
+            deambiguifier = other.variable
+            if not var is self.deambification_map.get(deambiguifier):
+                if var.stinfo['typerel'] is None:
+                    otheretypes = deambiguifier.stinfo['possibletypes']
+                elif not self.is_ambiguous(deambiguifier):
+                    otheretypes = self.varsols[deambiguifier]
+                elif deambiguifier in self.not_invariants:
+                    # we know variable won't be invariant, try to use
+                    # it to deambguify the current variable
+                    otheretypes = self.varsols[deambiguifier]
+            if deambiguifier.stinfo['typerel'] is None:
+                # if deambiguifier has no type restriction using 'is',
+                # don't record it
+                deambiguifier = None
+        elif isinstance(other, Constant) and other.uidtype:
+            otheretypes = (other.uidtype,)
+            deambiguifier = None
+        if otheretypes is not None:
+            # to restrict, we must check that for all type in othertypes,
+            # possible types on the other end of the relation are matching
+            # variable's possible types
+            rschema = self.rschema(rel.r_type)
+            if onlhs:
+                rtypefunc = rschema.subjects
+            else:
+                rtypefunc = rschema.objects
+            for otheretype in otheretypes:
+                reltypes = frozenset(rtypefunc(otheretype))
+                if var.stinfo['possibletypes'] != reltypes:
+                    return False
+            self.restrict(var, var.stinfo['possibletypes'])
+            self.deambification_map[var] = deambiguifier
+            return True
+        return False