Add a little in FAQ and HOWTO.
"""Functions to add additional annotations on a rql syntax tree to ease later
code generation.
:organization: Logilab
:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
"""
__docformat__ = "restructuredtext en"
from logilab.common.compat import any
from rql.nodes import Relation, Exists, VariableRef, Constant, Variable, Or
from rql.utils import common_parent
from cubicweb import server
def _annotate_select(annotator, rqlst):
for subquery in rqlst.with_:
annotator._annotate_union(subquery.query)
#if server.DEBUG:
# print '-------- sql annotate', repr(rqlst)
getrschema = annotator.schema.rschema
has_text_query = False
need_distinct = rqlst.distinct
for rel in rqlst.iget_nodes(Relation):
if rel.neged(strict=True):
if rel.is_types_restriction():
need_distinct = True
else:
rschema = getrschema(rel.r_type)
if rschema.inlined:
try:
var = rel.children[1].children[0].variable
except AttributeError:
pass # rewritten variable
else:
if not var.stinfo['constnode']:
need_distinct = True
elif getrschema(rel.r_type).symetric:
for vref in rel.iget_nodes(VariableRef):
stinfo = vref.variable.stinfo
if not stinfo['constnode'] and stinfo['selected']:
need_distinct = True
# XXX could mark as not invariant
break
for name, var in rqlst.defined_vars.items():
stinfo = var.stinfo
if stinfo.get('ftirels'):
has_text_query = True
if stinfo['attrvar']:
stinfo['invariant'] = False
stinfo['principal'] = _select_main_var(stinfo['rhsrelations'])
continue
if not stinfo['relations'] and not stinfo['typerels']:
# Any X, Any MAX(X)...
# those particular queries should be executed using the system
# entities table unless there is some type restriction
stinfo['invariant'] = True
stinfo['principal'] = None
continue
if any(rel for rel in stinfo['relations'] if rel.r_type == 'eid' and rel.operator() != '=') and \
not any(r for r in var.stinfo['relations'] - var.stinfo['rhsrelations']
if r.r_type != 'eid' and (getrschema(r.r_type).inlined or getrschema(r.r_type).final)):
# Any X WHERE X eid > 2
# those particular queries should be executed using the system entities table
stinfo['invariant'] = True
stinfo['principal'] = None
continue
if stinfo['selected'] and var.valuable_references() == 1+bool(stinfo['constnode']):
# "Any X", "Any X, Y WHERE X attr Y"
stinfo['invariant'] = False
continue
joins = set()
invariant = False
for ref in var.references():
rel = ref.relation()
if rel is None or rel.is_types_restriction():
continue
lhs, rhs = rel.get_parts()
onlhs = ref is lhs
if rel.r_type == 'eid':
if not (onlhs and len(stinfo['relations']) > 1):
break
if not stinfo['constnode']:
joins.add(rel)
continue
elif rel.r_type == 'identity':
# identity can't be used as principal, so check other relation are used
# XXX explain rhs.operator == '='
if rhs.operator != '=' or len(stinfo['relations']) <= 1: #(stinfo['constnode'] and rhs.operator == '='):
break
joins.add(rel)
continue
rschema = getrschema(rel.r_type)
if rel.optional:
if rel in stinfo['optrelations']:
# optional variable can't be invariant if this is the lhs
# variable of an inlined relation
if not rel in stinfo['rhsrelations'] and rschema.inlined:
break
else:
# variable used as main variable of an optional relation
# can't be invariant
break
if rschema.final or (onlhs and rschema.inlined):
if rschema.type != 'has_text':
# need join anyway if the variable appears in a final or
# inlined relation
break
joins.add(rel)
continue
if not stinfo['constnode']:
if rschema.inlined and rel.neged(strict=True):
# if relation is inlined, can't be invariant if that
# variable is used anywhere else.
# see 'Any P WHERE NOT N ecrit_par P, N eid 512':
# sql for 'NOT N ecrit_par P' is 'N.ecrit_par is NULL' so P
# can use N.ecrit_par as principal
if (stinfo['selected'] or len(stinfo['relations']) > 1):
break
elif rschema.symetric and stinfo['selected']:
break
joins.add(rel)
else:
# if there is at least one ambigous relation and no other to
# restrict types, can't be invariant since we need to filter out
# other types
if not annotator.is_ambiguous(var):
invariant = True
stinfo['invariant'] = invariant
if invariant and joins:
# remember rqlst/solutions analyze information
# we have to select a kindof "main" relation which will "extrajoins"
# the other
# priority should be given to relation which are not in inner queries
# (eg exists)
try:
stinfo['principal'] = _select_principal(var.sqlscope, joins)
except CantSelectPrincipal:
stinfo['invariant'] = False
rqlst.need_distinct = need_distinct
return has_text_query
class CantSelectPrincipal(Exception): pass
def _select_principal(sqlscope, relations):
"""given a list of rqlst relations, select one which will be used to
represent an invariant variable (e.g. using on extremity of the relation
instead of the variable's type table
"""
diffscope_rels = {}
has_same_scope_rel = False
ored_rels = set()
diffscope_rels = set()
for rel in relations:
# note: only eid and has_text among all final relations may be there
if rel.r_type in ('eid', 'identity'):
has_same_scope_rel = rel.sqlscope is sqlscope
continue
if rel.ored(traverse_scope=True):
ored_rels.add(rel)
elif rel.sqlscope is sqlscope:
return rel
elif not rel.neged(traverse_scope=True):
diffscope_rels.add(rel)
if len(ored_rels) > 1:
ored_rels_copy = tuple(ored_rels)
for rel1 in ored_rels_copy:
for rel2 in ored_rels_copy:
if rel1 is rel2:
continue
if isinstance(common_parent(rel1, rel2), Or):
ored_rels.discard(rel1)
ored_rels.discard(rel2)
for rel in ored_rels:
if rel.sqlscope is sqlscope:
return rel
diffscope_rels.add(rel)
# if DISTINCT query, can use variable from a different scope as principal
# since introduced duplicates will be removed
if sqlscope.stmt.distinct and diffscope_rels:
return iter(diffscope_rels).next()
# XXX could use a relation for a different scope if it can't generate
# duplicates, so we would have to check cardinality
raise CantSelectPrincipal()
def _select_main_var(relations):
"""given a list of rqlst relations, select one which will be used as main
relation for the rhs variable
"""
for rel in relations:
if rel.sqlscope is rel.stmt:
return rel
principal = rel
return principal
def set_qdata(union, noinvariant):
"""recursive function to set querier data on variables in the syntax tree
"""
for select in union.children:
for subquery in select.with_:
set_qdata(subquery.query, noinvariant)
for var in select.defined_vars.itervalues():
if var.stinfo['invariant']:
if var in noinvariant and not var.stinfo['principal'].r_type == 'has_text':
var._q_invariant = False
else:
var._q_invariant = True
else:
var._q_invariant = False
class SQLGenAnnotator(object):
def __init__(self, schema):
self.schema = schema
self.nfdomain = frozenset(eschema.type for eschema in schema.entities()
if not eschema.is_final())
def annotate(self, rqlst):
"""add information to the rql syntax tree to help sources to do their
job (read sql generation)
a variable is tagged as invariant if:
* it's a non final variable
* it's not used as lhs in any final or inlined relation
* there is no type restriction on this variable (either explicit in the
syntax tree or because a solution for this variable has been removed
due to security filtering)
"""
assert rqlst.TYPE == 'select', rqlst
rqlst.has_text_query = self._annotate_union(rqlst)
def _annotate_union(self, union):
has_text_query = False
for select in union.children:
htq = _annotate_select(self, select)
if htq:
has_text_query = True
return has_text_query
def is_ambiguous(self, var):
# ignore has_text relation
if len([rel for rel in var.stinfo['relations']
if rel.sqlscope is var.sqlscope and rel.r_type == 'has_text']) == 1:
return False
try:
data = var.stmt._deamb_data
except AttributeError:
data = var.stmt._deamb_data = IsAmbData(self.schema, self.nfdomain)
data.compute(var.stmt)
return data.is_ambiguous(var)
class IsAmbData(object):
def __init__(self, schema, nfdomain):
self.schema = schema
# shortcuts
self.rschema = schema.rschema
self.eschema = schema.eschema
# domain for non final variables
self.nfdomain = nfdomain
# {var: possible solutions set}
self.varsols = {}
# set of ambiguous variables
self.ambiguousvars = set()
# remember if a variable has been deambiguified by another to avoid
# doing the opposite
self.deambification_map = {}
# not invariant variables (access to final.inlined relation)
self.not_invariants = set()
def is_ambiguous(self, var):
return var in self.ambiguousvars
def restrict(self, var, restricted_domain):
self.varsols[var] &= restricted_domain
if var in self.ambiguousvars and self.varsols[var] == var.stinfo['possibletypes']:
self.ambiguousvars.remove(var)
def compute(self, rqlst):
# set domains for each variable
for varname, var in rqlst.defined_vars.iteritems():
if var.stinfo['uidrels'] or \
self.eschema(rqlst.solutions[0][varname]).is_final():
ptypes = var.stinfo['possibletypes']
else:
ptypes = set(self.nfdomain)
self.ambiguousvars.add(var)
self.varsols[var] = ptypes
if not self.ambiguousvars:
return
# apply relation restriction
self.maydeambrels = maydeambrels = {}
for rel in rqlst.iget_nodes(Relation):
if rel.is_types_restriction() or rel.r_type == 'eid':
continue
lhs, rhs = rel.get_variable_parts()
if isinstance(lhs, VariableRef) or isinstance(rhs, VariableRef):
rschema = self.rschema(rel.r_type)
if rschema.inlined or rschema.is_final():
self.not_invariants.add(lhs.variable)
self.set_rel_constraint(lhs, rel, rschema.subjects)
self.set_rel_constraint(rhs, rel, rschema.objects)
# try to deambiguify more variables by considering other variables'type
modified = True
while modified and self.ambiguousvars:
modified = False
for var in self.ambiguousvars.copy():
try:
for rel in (var.stinfo['relations'] & maydeambrels[var]):
if self.deambiguifying_relation(var, rel):
modified = True
break
except KeyError:
# no relation to deambiguify
continue
def _debug_print(self):
print 'varsols', dict((x, sorted(str(v) for v in values))
for x, values in self.varsols.iteritems())
print 'ambiguous vars', sorted(self.ambiguousvars)
def set_rel_constraint(self, term, rel, etypes_func):
if isinstance(term, VariableRef) and self.is_ambiguous(term.variable):
var = term.variable
if len(var.stinfo['relations'] - var.stinfo['typerels']) == 1 \
or rel.sqlscope is var.sqlscope:
self.restrict(var, frozenset(etypes_func()))
try:
self.maydeambrels[var].add(rel)
except KeyError:
self.maydeambrels[var] = set((rel,))
def deambiguifying_relation(self, var, rel):
lhs, rhs = rel.get_variable_parts()
onlhs = var is getattr(lhs, 'variable', None)
other = onlhs and rhs or lhs
otheretypes = None
# XXX isinstance(other.variable, Variable) to skip column alias
if isinstance(other, VariableRef) and isinstance(other.variable, Variable):
deambiguifier = other.variable
if not var is self.deambification_map.get(deambiguifier):
if not var.stinfo['typerels']:
otheretypes = deambiguifier.stinfo['possibletypes']
elif not self.is_ambiguous(deambiguifier):
otheretypes = self.varsols[deambiguifier]
elif deambiguifier in self.not_invariants:
# we know variable won't be invariant, try to use
# it to deambguify the current variable
otheretypes = self.varsols[deambiguifier]
elif isinstance(other, Constant) and other.uidtype:
otheretypes = (other.uidtype,)
deambiguifier = None
if otheretypes is not None:
# unless types for variable are already non-ambigous, check
# if this relation has some type ambiguity
rschema = self.rschema(rel.r_type)
if onlhs:
rtypefunc = rschema.subjects
else:
rtypefunc = rschema.objects
for otheretype in otheretypes:
reltypes = frozenset(rtypefunc(otheretype))
if var.stinfo['possibletypes'] != reltypes:
break
self.restrict(var, reltypes)
self.deambification_map[var] = deambiguifier
return True
return False