|
1 # copyright 2003-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
3 # |
|
4 # This file is part of CubicWeb. |
|
5 # |
|
6 # CubicWeb is free software: you can redistribute it and/or modify it under the |
|
7 # terms of the GNU Lesser General Public License as published by the Free |
|
8 # Software Foundation, either version 2.1 of the License, or (at your option) |
|
9 # any later version. |
|
10 # |
|
11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
|
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
|
14 # details. |
|
15 # |
|
16 # You should have received a copy of the GNU Lesser General Public License along |
|
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
|
18 """Functions to add additional annotations on a rql syntax tree to ease later |
|
19 code generation. |
|
20 """ |
|
21 from __future__ import print_function |
|
22 |
|
23 __docformat__ = "restructuredtext en" |
|
24 |
|
25 from rql import BadRQLQuery |
|
26 from rql.nodes import Relation, VariableRef, Constant, Variable, Or, Exists |
|
27 from rql.utils import common_parent |
|
28 |
|
29 def _annotate_select(annotator, rqlst): |
|
30 has_text_query = False |
|
31 for subquery in rqlst.with_: |
|
32 if annotator._annotate_union(subquery.query): |
|
33 has_text_query = True |
|
34 #if server.DEBUG: |
|
35 # print '-------- sql annotate', repr(rqlst) |
|
36 getrschema = annotator.schema.rschema |
|
37 for var in rqlst.defined_vars.values(): |
|
38 stinfo = var.stinfo |
|
39 if stinfo.get('ftirels'): |
|
40 has_text_query = True |
|
41 if stinfo['attrvar']: |
|
42 stinfo['invariant'] = False |
|
43 stinfo['principal'] = _select_main_var(stinfo['rhsrelations']) |
|
44 continue |
|
45 if not stinfo['relations'] and stinfo['typerel'] is None: |
|
46 # Any X, Any MAX(X)... |
|
47 # those particular queries should be executed using the system |
|
48 # entities table unless there is some type restriction |
|
49 stinfo['invariant'] = True |
|
50 stinfo['principal'] = None |
|
51 continue |
|
52 if any(rel for rel in stinfo['relations'] if rel.r_type == 'eid' and rel.operator() != '=') and \ |
|
53 not any(r for r in var.stinfo['relations'] - var.stinfo['rhsrelations'] |
|
54 if r.r_type != 'eid' and (getrschema(r.r_type).inlined or getrschema(r.r_type).final)): |
|
55 # Any X WHERE X eid > 2 |
|
56 # those particular queries should be executed using the system entities table |
|
57 stinfo['invariant'] = True |
|
58 stinfo['principal'] = None |
|
59 continue |
|
60 if stinfo['selected'] and var.valuable_references() == 1+bool(stinfo['constnode']): |
|
61 # "Any X", "Any X, Y WHERE X attr Y" |
|
62 stinfo['invariant'] = False |
|
63 continue |
|
64 joins = set() |
|
65 invariant = False |
|
66 for ref in var.references(): |
|
67 rel = ref.relation() |
|
68 if rel is None or rel.is_types_restriction(): |
|
69 continue |
|
70 lhs, rhs = rel.get_parts() |
|
71 onlhs = ref is lhs |
|
72 role = 'subject' if onlhs else 'object' |
|
73 if rel.r_type == 'eid': |
|
74 if not (onlhs and len(stinfo['relations']) > 1): |
|
75 break |
|
76 if not stinfo['constnode']: |
|
77 joins.add( (rel, role) ) |
|
78 continue |
|
79 elif rel.r_type == 'identity': |
|
80 # identity can't be used as principal, so check other relation are used |
|
81 # XXX explain rhs.operator == '=' |
|
82 if rhs.operator != '=' or len(stinfo['relations']) <= 1: #(stinfo['constnode'] and rhs.operator == '='): |
|
83 break |
|
84 joins.add( (rel, role) ) |
|
85 continue |
|
86 rschema = getrschema(rel.r_type) |
|
87 if rel.optional: |
|
88 if rel in stinfo.get('optrelations', ()): |
|
89 # optional variable can't be invariant if this is the lhs |
|
90 # variable of an inlined relation |
|
91 if not rel in stinfo['rhsrelations'] and rschema.inlined: |
|
92 break |
|
93 # variable used as main variable of an optional relation can't |
|
94 # be invariant, unless we can use some other relation as |
|
95 # reference for the outer join |
|
96 elif not stinfo['constnode']: |
|
97 break |
|
98 elif len(stinfo['relations']) == 2: |
|
99 if onlhs: |
|
100 ostinfo = rhs.children[0].variable.stinfo |
|
101 else: |
|
102 ostinfo = lhs.variable.stinfo |
|
103 if not (ostinfo.get('optcomparisons') or |
|
104 any(orel for orel in ostinfo['relations'] |
|
105 if orel.optional and orel is not rel)): |
|
106 break |
|
107 if rschema.final or (onlhs and rschema.inlined): |
|
108 if rschema.type != 'has_text': |
|
109 # need join anyway if the variable appears in a final or |
|
110 # inlined relation |
|
111 break |
|
112 joins.add( (rel, role) ) |
|
113 continue |
|
114 if not stinfo['constnode']: |
|
115 if rschema.inlined and rel.neged(strict=True): |
|
116 # if relation is inlined, can't be invariant if that |
|
117 # variable is used anywhere else. |
|
118 # see 'Any P WHERE NOT N ecrit_par P, N eid 512': |
|
119 # sql for 'NOT N ecrit_par P' is 'N.ecrit_par is NULL' so P |
|
120 # can use N.ecrit_par as principal |
|
121 if (stinfo['selected'] or len(stinfo['relations']) > 1): |
|
122 break |
|
123 joins.add( (rel, role) ) |
|
124 else: |
|
125 # if there is at least one ambigous relation and no other to |
|
126 # restrict types, can't be invariant since we need to filter out |
|
127 # other types |
|
128 if not annotator.is_ambiguous(var): |
|
129 invariant = True |
|
130 stinfo['invariant'] = invariant |
|
131 if invariant and joins: |
|
132 # remember rqlst/solutions analyze information |
|
133 # we have to select a kindof "main" relation which will "extrajoins" |
|
134 # the other |
|
135 # priority should be given to relation which are not in inner queries |
|
136 # (eg exists) |
|
137 try: |
|
138 stinfo['principal'] = principal = _select_principal(var.scope, joins) |
|
139 if getrschema(principal.r_type).inlined: |
|
140 # the scope of the lhs variable must be equal or outer to the |
|
141 # rhs variable's scope (since it's retrieved from lhs's table) |
|
142 sstinfo = principal.children[0].variable.stinfo |
|
143 sstinfo['scope'] = common_parent(sstinfo['scope'], stinfo['scope']).scope |
|
144 except CantSelectPrincipal: |
|
145 stinfo['invariant'] = False |
|
146 # see unittest_rqlannotation. test_has_text_security_cache_bug |
|
147 # XXX probably more to do, but yet that work without more... |
|
148 for col_alias in rqlst.aliases.values(): |
|
149 if col_alias.stinfo.get('ftirels'): |
|
150 has_text_query = True |
|
151 return has_text_query |
|
152 |
|
153 |
|
154 |
|
155 class CantSelectPrincipal(Exception): |
|
156 """raised when no 'principal' variable can be found""" |
|
157 |
|
158 def _select_principal(scope, relations, _sort=lambda x:x): |
|
159 """given a list of rqlst relations, select one which will be used to |
|
160 represent an invariant variable (e.g. using on extremity of the relation |
|
161 instead of the variable's type table |
|
162 """ |
|
163 # _sort argument is there for test |
|
164 diffscope_rels = {} |
|
165 ored_rels = set() |
|
166 diffscope_rels = set() |
|
167 for rel, role in _sort(relations): |
|
168 # note: only eid and has_text among all final relations may be there |
|
169 if rel.r_type in ('eid', 'identity'): |
|
170 continue |
|
171 if rel.optional is not None and len(relations) > 1: |
|
172 if role == 'subject' and rel.optional == 'right': |
|
173 continue |
|
174 if role == 'object' and rel.optional == 'left': |
|
175 continue |
|
176 if rel.ored(traverse_scope=True): |
|
177 ored_rels.add(rel) |
|
178 elif rel.scope is scope: |
|
179 return rel |
|
180 elif not rel.neged(traverse_scope=True): |
|
181 diffscope_rels.add(rel) |
|
182 if len(ored_rels) > 1: |
|
183 ored_rels_copy = tuple(ored_rels) |
|
184 for rel1 in ored_rels_copy: |
|
185 for rel2 in ored_rels_copy: |
|
186 if rel1 is rel2: |
|
187 continue |
|
188 if isinstance(common_parent(rel1, rel2), Or): |
|
189 ored_rels.discard(rel1) |
|
190 ored_rels.discard(rel2) |
|
191 for rel in _sort(ored_rels): |
|
192 if rel.scope is scope: |
|
193 return rel |
|
194 diffscope_rels.add(rel) |
|
195 # if DISTINCT query, can use variable from a different scope as principal |
|
196 # since introduced duplicates will be removed |
|
197 if scope.stmt.distinct and diffscope_rels: |
|
198 return next(iter(_sort(diffscope_rels))) |
|
199 # XXX could use a relation from a different scope if it can't generate |
|
200 # duplicates, so we should have to check cardinality |
|
201 raise CantSelectPrincipal() |
|
202 |
|
203 def _select_main_var(relations): |
|
204 """given a list of rqlst relations, select one which will be used as main |
|
205 relation for the rhs variable |
|
206 """ |
|
207 principal = None |
|
208 others = [] |
|
209 # sort for test predictability |
|
210 for rel in sorted(relations, key=lambda x: (x.children[0].name, x.r_type)): |
|
211 # only equality relation with a variable as rhs may be principal |
|
212 if rel.operator() not in ('=', 'IS') \ |
|
213 or not isinstance(rel.children[1].children[0], VariableRef) or rel.neged(strict=True): |
|
214 continue |
|
215 if rel.optional: |
|
216 others.append(rel) |
|
217 continue |
|
218 if rel.scope is rel.stmt: |
|
219 return rel |
|
220 principal = rel |
|
221 if principal is None: |
|
222 if others: |
|
223 return others[0] |
|
224 raise BadRQLQuery('unable to find principal in %s' % ', '.join( |
|
225 r.as_string() for r in relations)) |
|
226 return principal |
|
227 |
|
228 |
|
229 def set_qdata(getrschema, union, noinvariant): |
|
230 """recursive function to set querier data on variables in the syntax tree |
|
231 """ |
|
232 for select in union.children: |
|
233 for subquery in select.with_: |
|
234 set_qdata(getrschema, subquery.query, noinvariant) |
|
235 for var in select.defined_vars.values(): |
|
236 if var.stinfo['invariant']: |
|
237 if var in noinvariant and not var.stinfo['principal'].r_type == 'has_text': |
|
238 var._q_invariant = False |
|
239 else: |
|
240 var._q_invariant = True |
|
241 else: |
|
242 var._q_invariant = False |
|
243 |
|
244 |
|
245 class SQLGenAnnotator(object): |
|
246 def __init__(self, schema): |
|
247 self.schema = schema |
|
248 self.nfdomain = frozenset(eschema.type for eschema in schema.entities() |
|
249 if not eschema.final) |
|
250 |
|
251 def annotate(self, rqlst): |
|
252 """add information to the rql syntax tree to help sources to do their |
|
253 job (read sql generation) |
|
254 |
|
255 a variable is tagged as invariant if: |
|
256 * it's a non final variable |
|
257 * it's not used as lhs in any final or inlined relation |
|
258 * there is no type restriction on this variable (either explicit in the |
|
259 syntax tree or because a solution for this variable has been removed |
|
260 due to security filtering) |
|
261 """ |
|
262 #assert rqlst.TYPE == 'select', rqlst |
|
263 rqlst.has_text_query = self._annotate_union(rqlst) |
|
264 |
|
265 def _annotate_union(self, union): |
|
266 has_text_query = False |
|
267 for select in union.children: |
|
268 if _annotate_select(self, select): |
|
269 has_text_query = True |
|
270 return has_text_query |
|
271 |
|
272 def is_ambiguous(self, var): |
|
273 # ignore has_text relation when we know it will be used as principal. |
|
274 # This is expected by the rql2sql generator which will use the `entities` |
|
275 # table to filter out by type if necessary, This optimisation is very |
|
276 # interesting in multi-sources cases, as it may avoid a costly query |
|
277 # on sources to get all entities of a given type to achieve this, while |
|
278 # we have all the necessary information. |
|
279 root = var.stmt.root # Union node |
|
280 # rel.scope -> Select or Exists node, so add .parent to get Union from |
|
281 # Select node |
|
282 rels = [rel for rel in var.stinfo['relations'] if rel.scope.parent is root] |
|
283 if len(rels) == 1 and rels[0].r_type == 'has_text': |
|
284 return False |
|
285 try: |
|
286 data = var.stmt._deamb_data |
|
287 except AttributeError: |
|
288 data = var.stmt._deamb_data = IsAmbData(self.schema, self.nfdomain) |
|
289 data.compute(var.stmt) |
|
290 return data.is_ambiguous(var) |
|
291 |
|
292 |
|
293 class IsAmbData(object): |
|
294 def __init__(self, schema, nfdomain): |
|
295 self.schema = schema |
|
296 # shortcuts |
|
297 self.rschema = schema.rschema |
|
298 self.eschema = schema.eschema |
|
299 # domain for non final variables |
|
300 self.nfdomain = nfdomain |
|
301 # {var: possible solutions set} |
|
302 self.varsols = {} |
|
303 # set of ambiguous variables |
|
304 self.ambiguousvars = set() |
|
305 # remember if a variable has been deambiguified by another to avoid |
|
306 # doing the opposite |
|
307 self.deambification_map = {} |
|
308 # not invariant variables (access to final.inlined relation) |
|
309 self.not_invariants = set() |
|
310 |
|
311 def is_ambiguous(self, var): |
|
312 return var in self.ambiguousvars |
|
313 |
|
314 def restrict(self, var, restricted_domain): |
|
315 self.varsols[var] &= restricted_domain |
|
316 if var in self.ambiguousvars and self.varsols[var] == var.stinfo['possibletypes']: |
|
317 self.ambiguousvars.remove(var) |
|
318 |
|
319 def compute(self, rqlst): |
|
320 # set domains for each variable |
|
321 for varname, var in rqlst.defined_vars.items(): |
|
322 if var.stinfo['uidrel'] is not None or \ |
|
323 self.eschema(rqlst.solutions[0][varname]).final: |
|
324 ptypes = var.stinfo['possibletypes'] |
|
325 else: |
|
326 ptypes = set(self.nfdomain) |
|
327 self.ambiguousvars.add(var) |
|
328 self.varsols[var] = ptypes |
|
329 if not self.ambiguousvars: |
|
330 return |
|
331 # apply relation restriction |
|
332 self.maydeambrels = maydeambrels = {} |
|
333 for rel in rqlst.iget_nodes(Relation): |
|
334 if rel.r_type == 'eid' or rel.is_types_restriction(): |
|
335 continue |
|
336 lhs, rhs = rel.get_variable_parts() |
|
337 if isinstance(lhs, VariableRef) or isinstance(rhs, VariableRef): |
|
338 rschema = self.rschema(rel.r_type) |
|
339 if rschema.inlined or rschema.final: |
|
340 self.not_invariants.add(lhs.variable) |
|
341 self.set_rel_constraint(lhs, rel, rschema.subjects) |
|
342 self.set_rel_constraint(rhs, rel, rschema.objects) |
|
343 # try to deambiguify more variables by considering other variables'type |
|
344 modified = True |
|
345 while modified and self.ambiguousvars: |
|
346 modified = False |
|
347 for var in self.ambiguousvars.copy(): |
|
348 try: |
|
349 for rel in (var.stinfo['relations'] & maydeambrels[var]): |
|
350 if self.deambiguifying_relation(var, rel): |
|
351 modified = True |
|
352 break |
|
353 except KeyError: |
|
354 # no relation to deambiguify |
|
355 continue |
|
356 |
|
357 def _debug_print(self): |
|
358 print('varsols', dict((x, sorted(str(v) for v in values)) |
|
359 for x, values in self.varsols.items())) |
|
360 print('ambiguous vars', sorted(self.ambiguousvars)) |
|
361 |
|
362 def set_rel_constraint(self, term, rel, etypes_func): |
|
363 if isinstance(term, VariableRef) and self.is_ambiguous(term.variable): |
|
364 var = term.variable |
|
365 if len(var.stinfo['relations']) == 1 \ |
|
366 or rel.scope is var.scope or rel.r_type == 'identity': |
|
367 self.restrict(var, frozenset(etypes_func())) |
|
368 try: |
|
369 self.maydeambrels[var].add(rel) |
|
370 except KeyError: |
|
371 self.maydeambrels[var] = set((rel,)) |
|
372 |
|
373 def deambiguifying_relation(self, var, rel): |
|
374 lhs, rhs = rel.get_variable_parts() |
|
375 onlhs = var is getattr(lhs, 'variable', None) |
|
376 other = onlhs and rhs or lhs |
|
377 otheretypes = None |
|
378 # XXX isinstance(other.variable, Variable) to skip column alias |
|
379 if isinstance(other, VariableRef) and isinstance(other.variable, Variable): |
|
380 deambiguifier = other.variable |
|
381 if not var is self.deambification_map.get(deambiguifier): |
|
382 if var.stinfo['typerel'] is None: |
|
383 otheretypes = deambiguifier.stinfo['possibletypes'] |
|
384 elif not self.is_ambiguous(deambiguifier): |
|
385 otheretypes = self.varsols[deambiguifier] |
|
386 elif deambiguifier in self.not_invariants: |
|
387 # we know variable won't be invariant, try to use |
|
388 # it to deambguify the current variable |
|
389 otheretypes = self.varsols[deambiguifier] |
|
390 if deambiguifier.stinfo['typerel'] is None: |
|
391 # if deambiguifier has no type restriction using 'is', |
|
392 # don't record it |
|
393 deambiguifier = None |
|
394 elif isinstance(other, Constant) and other.uidtype: |
|
395 otheretypes = (other.uidtype,) |
|
396 deambiguifier = None |
|
397 if otheretypes is not None: |
|
398 # to restrict, we must check that for all type in othertypes, |
|
399 # possible types on the other end of the relation are matching |
|
400 # variable's possible types |
|
401 rschema = self.rschema(rel.r_type) |
|
402 if onlhs: |
|
403 rtypefunc = rschema.subjects |
|
404 else: |
|
405 rtypefunc = rschema.objects |
|
406 for otheretype in otheretypes: |
|
407 reltypes = frozenset(rtypefunc(otheretype)) |
|
408 if var.stinfo['possibletypes'] != reltypes: |
|
409 return False |
|
410 self.restrict(var, var.stinfo['possibletypes']) |
|
411 self.deambification_map[var] = deambiguifier |
|
412 return True |
|
413 return False |