diff -r 000000000000 -r b97547f5f1fa server/sources/rql2sql.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/sources/rql2sql.py Wed Nov 05 15:52:50 2008 +0100 @@ -0,0 +1,1202 @@ +"""RQL to SQL generator for native sources. + + +SQL queries optimization +~~~~~~~~~~~~~~~~~~~~~~~~ +1. EUser X WHERE X in_group G, G name 'users': + + EUser is the only subject entity type for the in_group relation, + which allow us to do :: + + SELECT eid_from FROM in_group, EGroup + WHERE in_group.eid_to = EGroup.eid_from + AND EGroup.name = 'users' + + +2. Any X WHERE X nonfinal1 Y, Y nonfinal2 Z + + -> direct join between nonfinal1 and nonfinal2, whatever X,Y, Z (unless + inlined...) + + NOT IMPLEMENTED (and quite hard to implement) + +Potential optimization information is collected by the querier, sql generation +is done according to this information + + +:organization: Logilab +:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr +""" +__docformat__ = "restructuredtext en" + +import threading + +from rql import BadRQLQuery, CoercionError +from rql.stmts import Union, Select +from rql.nodes import (SortTerm, VariableRef, Constant, Function, Not, + Variable, ColumnAlias, Relation, SubQuery, Exists) + +from cubicweb import server +from cubicweb.server.utils import cleanup_solutions + +def _new_var(select, varname): + newvar = select.get_variable(varname) + if not 'relations' in newvar.stinfo: + # not yet initialized + newvar.prepare_annotation() + newvar.stinfo['scope'] = select + newvar._q_invariant = False + return newvar + +def _fill_to_wrap_rel(var, newselect, towrap, schema): + for rel in var.stinfo['relations'] - var.stinfo['rhsrelations']: + rschema = schema.rschema(rel.r_type) + if rschema.inlined: + towrap.add( (var, rel) ) + for vref in rel.children[1].iget_nodes(VariableRef): + newivar = _new_var(newselect, vref.name) + newselect.selection.append(VariableRef(newivar)) + _fill_to_wrap_rel(vref.variable, newselect, towrap, schema) + elif rschema.is_final(): + towrap.add( (var, rel) ) + +def rewrite_unstable_outer_join(select, solutions, unstable, schema): + """if some optional variables are unstable, they should be selected in a + subquery. This function check this and rewrite the rql syntax tree if + necessary (in place). Return a boolean telling if the tree has been modified + """ + torewrite = set() + modified = False + for varname in tuple(unstable): + var = select.defined_vars[varname] + if not var.stinfo['optrelations']: + continue + modified = True + unstable.remove(varname) + torewrite.add(var) + newselect = Select() + newselect.need_distinct = False + myunion = Union() + myunion.append(newselect) + # extract aliases / selection + newvar = _new_var(newselect, var.name) + newselect.selection = [VariableRef(newvar)] + for avar in select.defined_vars.itervalues(): + if avar.stinfo['attrvar'] is var: + newavar = _new_var(newselect, avar.name) + newavar.stinfo['attrvar'] = newvar + newselect.selection.append(VariableRef(newavar)) + towrap_rels = set() + _fill_to_wrap_rel(var, newselect, towrap_rels, schema) + # extract relations + for var, rel in towrap_rels: + newrel = rel.copy(newselect) + newselect.add_restriction(newrel) + select.remove_node(rel) + var.stinfo['relations'].remove(rel) + newvar.stinfo['relations'].add(newrel) + if rel.optional in ('left', 'both'): + newvar.stinfo['optrelations'].add(newrel) + for vref in newrel.children[1].iget_nodes(VariableRef): + var = vref.variable + var.stinfo['relations'].add(newrel) + var.stinfo['rhsrelations'].add(newrel) + if rel.optional in ('right', 'both'): + var.stinfo['optrelations'].add(newrel) + # extract subquery solutions + solutions = [sol.copy() for sol in solutions] + cleanup_solutions(newselect, solutions) + newselect.set_possible_types(solutions) + # full sub-query + aliases = [VariableRef(select.get_variable(avar.name, i)) + for i, avar in enumerate(newselect.selection)] + select.add_subquery(SubQuery(aliases, myunion), check=False) + return modified + +def _new_solutions(rqlst, solutions): + """first filter out subqueries variables from solutions""" + newsolutions = [] + for origsol in solutions: + asol = {} + for vname in rqlst.defined_vars: + asol[vname] = origsol[vname] + if not asol in newsolutions: + newsolutions.append(asol) + return newsolutions + +def remove_unused_solutions(rqlst, solutions, varmap, schema): + """cleanup solutions: remove solutions where invariant variables are taking + different types + """ + newsolutions = _new_solutions(rqlst, solutions) + existssols = {} + unstable = set() + for vname, var in rqlst.defined_vars.iteritems(): + vtype = newsolutions[0][vname] + if var._q_invariant or vname in varmap: + for i in xrange(len(newsolutions)-1, 0, -1): + if vtype != newsolutions[i][vname]: + newsolutions.pop(i) + elif not var.scope is rqlst: + # move appart variables which are in a EXISTS scope and are variating + try: + thisexistssols, thisexistsvars = existssols[var.scope] + except KeyError: + thisexistssols = [newsolutions[0]] + thisexistsvars = set() + existssols[var.scope] = thisexistssols, thisexistsvars + for i in xrange(len(newsolutions)-1, 0, -1): + if vtype != newsolutions[i][vname]: + thisexistssols.append(newsolutions.pop(i)) + thisexistsvars.add(vname) + else: + # remember unstable variables + for i in xrange(1, len(newsolutions)): + if vtype != newsolutions[i][vname]: + unstable.add(vname) + if len(newsolutions) > 1: + if rewrite_unstable_outer_join(rqlst, newsolutions, unstable, schema): + # remove variables extracted to subqueries from solutions + newsolutions = _new_solutions(rqlst, newsolutions) + return newsolutions, existssols, unstable + +def relation_info(relation): + lhs, rhs = relation.get_variable_parts() + try: + lhs = lhs.variable + lhsconst = lhs.stinfo['constnode'] + except AttributeError: + lhsconst = lhs + lhs = None + except KeyError: + lhsconst = None # ColumnAlias + try: + rhs = rhs.variable + rhsconst = rhs.stinfo['constnode'] + except AttributeError: + rhsconst = rhs + rhs = None + except KeyError: + rhsconst = None # ColumnAlias + return lhs, lhsconst, rhs, rhsconst + +def switch_relation_field(sql, table=''): + switchedsql = sql.replace(table + '.eid_from', '__eid_from__') + switchedsql = switchedsql.replace(table + '.eid_to', + table + '.eid_from') + return switchedsql.replace('__eid_from__', table + '.eid_to') + +def sort_term_selection(sorts, selectedidx, rqlst, groups): + # XXX beurk + if isinstance(rqlst, list): + def append(term): + rqlst.append(term) + else: + def append(term): + rqlst.selection.append(term.copy(rqlst)) + for sortterm in sorts: + term = sortterm.term + if not isinstance(term, Constant) and not str(term) in selectedidx: + selectedidx.append(str(term)) + append(term) + if groups: + for vref in term.iget_nodes(VariableRef): + if not vref in groups: + groups.append(vref) + +def fix_selection(rqlst, selectedidx, needwrap, sorts, groups, having): + if sorts: + sort_term_selection(sorts, selectedidx, rqlst, not needwrap and groups) + if needwrap: + if groups: + for vref in groups: + if not vref.name in selectedidx: + selectedidx.append(vref.name) + rqlst.selection.append(vref) + if having: + for term in having: + for vref in term.iget_nodes(VariableRef): + if not vref.name in selectedidx: + selectedidx.append(vref.name) + rqlst.selection.append(vref) + +# IGenerator implementation for RQL->SQL ###################################### + + +class StateInfo(object): + def __init__(self, existssols, unstablevars): + self.existssols = existssols + self.unstablevars = unstablevars + self.subtables = {} + + def reset(self, solution): + """reset some visit variables""" + self.solution = solution + self.count = 0 + self.done = set() + self.tables = self.subtables.copy() + self.actual_tables = [[]] + for _, tsql in self.tables.itervalues(): + self.actual_tables[-1].append(tsql) + self.outer_tables = {} + self.duplicate_switches = [] + self.attr_vars = {} + self.aliases = {} + self.restrictions = [] + self._restr_stack = [] + + def add_restriction(self, restr): + if restr: + self.restrictions.append(restr) + + def iter_exists_sols(self, exists): + if not exists in self.existssols: + yield 1 + return + thisexistssols, thisexistsvars = self.existssols[exists] + origsol = self.solution + origtables = self.tables + done = self.done + for thisexistssol in thisexistssols: + for vname in self.unstablevars: + if thisexistssol[vname] != origsol[vname] and vname in thisexistsvars: + break + else: + self.tables = origtables.copy() + self.solution = thisexistssol + yield 1 + # cleanup self.done from stuff specific to exists + for var in thisexistsvars: + if var in done: + done.remove(var) + for rel in exists.iget_nodes(Relation): + if rel in done: + done.remove(rel) + self.solution = origsol + self.tables = origtables + + def push_scope(self): + self.actual_tables.append([]) + self._restr_stack.append(self.restrictions) + self.restrictions = [] + + def pop_scope(self): + restrictions = self.restrictions + self.restrictions = self._restr_stack.pop() + return restrictions, self.actual_tables.pop() + + +class SQLGenerator(object): + """ + generation of SQL from the fully expanded RQL syntax tree + SQL is designed to be used with a CubicWeb SQL schema + + Groups and sort are not handled here since they should not be handled at + this level (see cubicweb.server.querier) + + we should not have errors here ! + + WARNING: a CubicWebSQLGenerator instance is not thread safe, but generate is + protected by a lock + """ + + def __init__(self, schema, dbms_helper, dbencoding='UTF-8'): + self.schema = schema + self.dbms_helper = dbms_helper + self.dbencoding = dbencoding + self.keyword_map = {'NOW' : self.dbms_helper.sql_current_timestamp, + 'TODAY': self.dbms_helper.sql_current_date, + } + if not self.dbms_helper.union_parentheses_support: + self.union_sql = self.noparen_union_sql + self._lock = threading.Lock() + + def generate(self, union, args=None, varmap=None): + """return SQL queries and a variable dictionnary from a RQL syntax tree + + :partrqls: a list of couple (rqlst, solutions) + :args: optional dictionary with values of substitutions used in the query + :varmap: optional dictionary mapping variable name to a special table + name, in case the query as to fetch data from temporary tables + + return an sql string and a dictionary with substitutions values + """ + if args is None: + args = {} + if varmap is None: + varmap = {} + self._lock.acquire() + self._args = args + self._varmap = varmap + self._query_attrs = {} + self._state = None + try: + # union query for each rqlst / solution + sql = self.union_sql(union) + # we are done + return sql, self._query_attrs + finally: + self._lock.release() + + def union_sql(self, union, needalias=False): # pylint: disable-msg=E0202 + if len(union.children) == 1: + return self.select_sql(union.children[0], needalias) + sqls = ('(%s)' % self.select_sql(select, needalias) + for select in union.children) + return '\nUNION ALL\n'.join(sqls) + + def noparen_union_sql(self, union, needalias=False): + # needed for sqlite backend which doesn't like parentheses around + # union query. This may cause bug in some condition (sort in one of + # the subquery) but will work in most case + # see http://www.sqlite.org/cvstrac/tktview?tn=3074 + sqls = (self.select_sql(select, needalias) + for i, select in enumerate(union.children)) + return '\nUNION ALL\n'.join(sqls) + + def select_sql(self, select, needalias=False): + """return SQL queries and a variable dictionnary from a RQL syntax tree + + :select: a selection statement of the syntax tree (`rql.stmts.Select`) + :solution: a dictionnary containing variables binding. + A solution's dictionnary has variable's names as key and variable's + types as values + :needwrap: boolean telling if the query will be wrapped in an outer + query (to deal with aggregat and/or grouping) + """ + distinct = selectsortterms = select.need_distinct + sorts = select.orderby + groups = select.groupby + having = select.having + # remember selection, it may be changed and have to be restored + origselection = select.selection[:] + # check if the query will have union subquery, if it need sort term + # selection (union or distinct query) and wrapping (union with groups) + needwrap = False + sols = select.solutions + if len(sols) > 1: + # remove invariant from solutions + sols, existssols, unstable = remove_unused_solutions( + select, sols, self._varmap, self.schema) + if len(sols) > 1: + # if there is still more than one solution, a UNION will be + # generated and so sort terms have to be selected + selectsortterms = True + # and if select is using group by or aggregat, a wrapping + # query will be necessary + if groups or select.has_aggregat: + select.select_only_variables() + needwrap = True + else: + existssols, unstable = {}, () + state = StateInfo(existssols, unstable) + # treat subqueries + self._subqueries_sql(select, state) + # generate sql for this select node + selectidx = [str(term) for term in select.selection] + if needwrap: + outerselection = origselection[:] + if sorts and selectsortterms: + outerselectidx = [str(term) for term in outerselection] + if distinct: + sort_term_selection(sorts, outerselectidx, + outerselection, groups) + else: + outerselectidx = selectidx[:] + fix_selection(select, selectidx, needwrap, + selectsortterms and sorts, groups, having) + if needwrap: + fselectidx = outerselectidx + fneedwrap = len(outerselection) != len(origselection) + else: + fselectidx = selectidx + fneedwrap = len(select.selection) != len(origselection) + if fneedwrap: + needalias = True + self._in_wrapping_query = False + self._state = state + try: + sql = self._solutions_sql(select, sols, distinct, needalias or needwrap) + # generate groups / having before wrapping query selection to + # get correct column aliases + self._in_wrapping_query = needwrap + if groups: + # no constant should be inserted in GROUP BY else the backend will + # interpret it as a positional index in the selection + groups = ','.join(vref.accept(self) for vref in groups + if not isinstance(vref, Constant)) + if having: + # filter out constants as for GROUP BY + having = ','.join(vref.accept(self) for vref in having + if not isinstance(vref, Constant)) + if needwrap: + sql = '%s FROM (%s) AS T1' % (self._selection_sql(outerselection, distinct, + needalias), + sql) + if groups: + sql += '\nGROUP BY %s' % groups + if having: + sql += '\nHAVING %s' % having + # sort + if sorts: + sql += '\nORDER BY %s' % ','.join(self._sortterm_sql(sortterm, + fselectidx) + for sortterm in sorts) + if fneedwrap: + selection = ['T1.C%s' % i for i in xrange(len(origselection))] + sql = 'SELECT %s FROM (%s) AS T1' % (','.join(selection), sql) + finally: + select.selection = origselection + # limit / offset + limit = select.limit + if limit: + sql += '\nLIMIT %s' % limit + offset = select.offset + if offset: + sql += '\nOFFSET %s' % offset + return sql + + def _subqueries_sql(self, select, state): + for i, subquery in enumerate(select.with_): + sql = self.union_sql(subquery.query, needalias=True) + tablealias = '_T%s' % i + sql = '(%s) AS %s' % (sql, tablealias) + state.subtables[tablealias] = (0, sql) + for vref in subquery.aliases: + alias = vref.variable + alias._q_sqltable = tablealias + alias._q_sql = '%s.C%s' % (tablealias, alias.colnum) + + def _solutions_sql(self, select, solutions, distinct, needalias): + sqls = [] + for solution in solutions: + self._state.reset(solution) + # visit restriction subtree + if select.where is not None: + self._state.add_restriction(select.where.accept(self)) + sql = [self._selection_sql(select.selection, distinct, needalias)] + if self._state.restrictions: + sql.append('WHERE %s' % ' AND '.join(self._state.restrictions)) + # add required tables + assert len(self._state.actual_tables) == 1, self._state.actual_tables + tables = self._state.actual_tables[-1] + if tables: + # sort for test predictability + sql.insert(1, 'FROM %s' % ', '.join(sorted(tables))) + elif self._state.restrictions and self.dbms_helper.needs_from_clause: + sql.insert(1, 'FROM (SELECT 1) AS _T') + sqls.append('\n'.join(sql)) + if distinct: + return '\nUNION\n'.join(sqls) + else: + return '\nUNION ALL\n'.join(sqls) + + def _selection_sql(self, selected, distinct, needaliasing=False): + clause = [] + for term in selected: + sql = term.accept(self) + if needaliasing: + colalias = 'C%s' % len(clause) + clause.append('%s AS %s' % (sql, colalias)) + if isinstance(term, VariableRef): + self._state.aliases[term.name] = colalias + else: + clause.append(sql) + if distinct: + return 'SELECT DISTINCT %s' % ', '.join(clause) + return 'SELECT %s' % ', '.join(clause) + + def _sortterm_sql(self, sortterm, selectidx): + term = sortterm.term + try: + sqlterm = str(selectidx.index(str(term)) + 1) + except ValueError: + # Constant node or non selected term + sqlterm = str(term.accept(self)) + if sortterm.asc: + return sqlterm + else: + return '%s DESC' % sqlterm + + def visit_and(self, et): + """generate SQL for a AND subtree""" + res = [] + for c in et.children: + part = c.accept(self) + if part: + res.append(part) + return ' AND '.join(res) + + def visit_or(self, ou): + """generate SQL for a OR subtree""" + res = [] + for c in ou.children: + part = c.accept(self) + if part: + res.append('(%s)' % part) + if res: + if len(res) > 1: + return '(%s)' % ' OR '.join(res) + return res[0] + return '' + + def visit_not(self, node): + self._state.push_scope() + csql = node.children[0].accept(self) + sqls, tables = self._state.pop_scope() + if node in self._state.done or not csql: + # already processed or no sql generated by children + self._state.actual_tables[-1] += tables + self._state.restrictions += sqls + return csql + if isinstance(node.children[0], Exists): + assert not sqls, (sqls, str(node.stmt)) + assert not tables, (tables, str(node.stmt)) + return 'NOT %s' % csql + sqls.append(csql) + if tables: + select = 'SELECT 1 FROM %s' % ','.join(tables) + else: + select = 'SELECT 1' + if sqls: + sql = 'NOT EXISTS(%s WHERE %s)' % (select, ' AND '.join(sqls)) + else: + sql = 'NOT EXISTS(%s)' % select + return sql + + def visit_exists(self, exists): + """generate SQL name for a exists subquery""" + sqls = [] + for dummy in self._state.iter_exists_sols(exists): + sql = self._visit_exists(exists) + if sql: + sqls.append(sql) + if not sqls: + return '' + return 'EXISTS(%s)' % ' UNION '.join(sqls) + + def _visit_exists(self, exists): + self._state.push_scope() + restriction = exists.children[0].accept(self) + restrictions, tables = self._state.pop_scope() + if restriction: + restrictions.append(restriction) + restriction = ' AND '.join(restrictions) + if not restriction: + return '' + if not tables: + # XXX could leave surrounding EXISTS() in this case no? + sql = 'SELECT 1 WHERE %s' % restriction + else: + sql = 'SELECT 1 FROM %s WHERE %s' % (', '.join(tables), restriction) + return sql + + + def visit_relation(self, relation): + """generate SQL for a relation""" + rtype = relation.r_type + # don't care of type constraint statement (i.e. relation_type = 'is') + if relation.is_types_restriction(): + return '' + lhs, rhs = relation.get_parts() + rschema = self.schema.rschema(rtype) + if rschema.is_final(): + if rtype == 'eid' and lhs.variable._q_invariant and \ + lhs.variable.stinfo['constnode']: + # special case where this restriction is already generated by + # some other relation + return '' + # attribute relation + if rtype == 'has_text': + sql = self._visit_has_text_relation(relation) + else: + rhs_vars = rhs.get_nodes(VariableRef) + if rhs_vars: + # if variable(s) in the RHS + sql = self._visit_var_attr_relation(relation, rhs_vars) + else: + # no variables in the RHS + sql = self._visit_attribute_relation(relation) + if relation.neged(strict=True): + self._state.done.add(relation.parent) + sql = 'NOT (%s)' % sql + else: + if rtype == 'is' and rhs.operator == 'IS': + # special case "C is NULL" + if lhs.name in self._varmap: + lhssql = self._varmap[lhs.name] + else: + lhssql = lhs.accept(self) + return '%s%s' % (lhssql, rhs.accept(self)) + if '%s.%s' % (lhs, relation.r_type) in self._varmap: + # relation has already been processed by a previous step + return + if relation.optional: + # check it has not already been treaten (to get necessary + # information to add an outer join condition) + if relation in self._state.done: + return + # OPTIONAL relation, generate a left|right outer join + sql = self._visit_outer_join_relation(relation, rschema) + elif rschema.inlined: + sql = self._visit_inlined_relation(relation) +# elif isinstance(relation.parent, Not): +# self._state.done.add(relation.parent) +# # NOT relation +# sql = self._visit_not_relation(relation, rschema) + else: + # regular (non final) relation + sql = self._visit_relation(relation, rschema) + return sql + + def _visit_inlined_relation(self, relation): + lhsvar, _, rhsvar, rhsconst = relation_info(relation) + # we are sure here to have a lhsvar + assert lhsvar is not None + lhssql = self._inlined_var_sql(lhsvar, relation.r_type) + if isinstance(relation.parent, Not): + self._state.done.add(relation.parent) + sql = "%s IS NULL" % lhssql + if rhsvar is not None and not rhsvar._q_invariant: + sql = '(%s OR %s!=%s)' % (sql, lhssql, rhsvar.accept(self)) + return sql + if rhsconst is not None: + return '%s=%s' % (lhssql, rhsconst.accept(self)) + if isinstance(rhsvar, Variable) and not rhsvar.name in self._varmap: + # if the rhs variable is only linked to this relation, this mean we + # only want the relation to exists, eg NOT NULL in case of inlined + # relation + if len(rhsvar.stinfo['relations']) == 1 and rhsvar._q_invariant: + return '%s IS NOT NULL' % lhssql + if rhsvar._q_invariant: + return self._extra_join_sql(relation, lhssql, rhsvar) + return '%s=%s' % (lhssql, rhsvar.accept(self)) + + def _process_relation_term(self, relation, rid, termvar, termconst, relfield): + if termconst or isinstance(termvar, ColumnAlias) or not termvar._q_invariant: + termsql = termconst and termconst.accept(self) or termvar.accept(self) + yield '%s.%s=%s' % (rid, relfield, termsql) + elif termvar._q_invariant: + # if the variable is mapped, generate restriction anyway + if termvar.name in self._varmap: + termsql = termvar.accept(self) + yield '%s.%s=%s' % (rid, relfield, termsql) + extrajoin = self._extra_join_sql(relation, '%s.%s' % (rid, relfield), termvar) + if extrajoin: + yield extrajoin + + def _visit_relation(self, relation, rschema): + """generate SQL for a relation + + implements optimization 1. + """ + if relation.r_type == 'identity': + # special case "X identity Y" + lhs, rhs = relation.get_parts() + if isinstance(relation.parent, Not): + self._state.done.add(relation.parent) + return 'NOT %s%s' % (lhs.accept(self), rhs.accept(self)) + return '%s%s' % (lhs.accept(self), rhs.accept(self)) + lhsvar, lhsconst, rhsvar, rhsconst = relation_info(relation) + rid = self._relation_table(relation) + sqls = [] + sqls += self._process_relation_term(relation, rid, lhsvar, lhsconst, 'eid_from') + sqls += self._process_relation_term(relation, rid, rhsvar, rhsconst, 'eid_to') + sql = ' AND '.join(sqls) + if rschema.symetric: + sql = '(%s OR %s)' % (sql, switch_relation_field(sql)) + return sql + + def _visit_outer_join_relation(self, relation, rschema): + """ + left outer join syntax (optional=='right'): + X relation Y? + + right outer join syntax (optional=='left'): + X? relation Y + + full outer join syntaxes (optional=='both'): + X? relation Y? + + if relation is inlined: + if it's a left outer join: + -> X LEFT OUTER JOIN Y ON (X.relation=Y.eid) + elif it's a right outer join: + -> Y LEFT OUTER JOIN X ON (X.relation=Y.eid) + elif it's a full outer join: + -> X FULL OUTER JOIN Y ON (X.relation=Y.eid) + else: + if it's a left outer join: + -> X LEFT OUTER JOIN relation ON (relation.eid_from=X.eid) + LEFT OUTER JOIN Y ON (relation.eid_to=Y.eid) + elif it's a right outer join: + -> Y LEFT OUTER JOIN relation ON (relation.eid_to=Y.eid) + LEFT OUTER JOIN X ON (relation.eid_from=X.eid) + elif it's a full outer join: + -> X FULL OUTER JOIN Y ON (X.relation=Y.eid) + """ + lhsvar, lhsconst, rhsvar, rhsconst = relation_info(relation) + if relation.optional == 'right': + joinattr, restrattr = 'eid_from', 'eid_to' + else: + lhsvar, rhsvar = rhsvar, lhsvar + lhsconst, rhsconst = rhsconst, lhsconst + joinattr, restrattr = 'eid_to', 'eid_from' + if relation.optional == 'both': + outertype = 'FULL' + else: + outertype = 'LEFT' + if rschema.inlined or relation.r_type == 'identity': + self._state.done.add(relation) + t1 = self._var_table(lhsvar) + if relation.r_type == 'identity': + attr = 'eid' + else: + attr = relation.r_type + # reset lhs/rhs, we need the initial order now + lhs, rhs = relation.get_variable_parts() + if '%s.%s' % (lhs.name, attr) in self._varmap: + lhssql = self._varmap['%s.%s' % (lhs.name, attr)] + else: + lhssql = '%s.%s' % (self._var_table(lhs.variable), attr) + if not rhsvar is None: + t2 = self._var_table(rhsvar) + if t2 is None: + if rhsconst is not None: + # inlined relation with invariant as rhs + condition = '%s=%s' % (lhssql, rhsconst.accept(self)) + if relation.r_type != 'identity': + condition = '(%s OR %s IS NULL)' % (condition, lhssql) + if not lhsvar.stinfo['optrelations']: + return condition + self.add_outer_join_condition(lhsvar, t1, condition) + return + else: + condition = '%s=%s' % (lhssql, rhsconst.accept(self)) + self.add_outer_join_condition(lhsvar, t1, condition) + join = '%s OUTER JOIN %s ON (%s=%s)' % ( + outertype, self._state.tables[t2][1], lhssql, rhs.accept(self)) + self.replace_tables_by_outer_join(join, t1, t2) + return '' + lhssql = lhsconst and lhsconst.accept(self) or lhsvar.accept(self) + rhssql = rhsconst and rhsconst.accept(self) or rhsvar.accept(self) + rid = self._relation_table(relation) + if not lhsvar: + join = '' + toreplace = [] + maintable = rid + else: + join = '%s OUTER JOIN %s ON (%s.%s=%s' % ( + outertype, self._state.tables[rid][1], rid, joinattr, lhssql) + toreplace = [rid] + maintable = self._var_table(lhsvar) + if rhsconst: + join += ' AND %s.%s=%s)' % (rid, restrattr, rhssql) + else: + join += ')' + if not rhsconst: + rhstable = self._var_table(rhsvar) + if rhstable: + assert rhstable is not None, rhsvar + join += ' %s OUTER JOIN %s ON (%s.%s=%s)' % ( + outertype, self._state.tables[rhstable][1], rid, restrattr, rhssql) + toreplace.append(rhstable) + self.replace_tables_by_outer_join(join, maintable, *toreplace) + return '' + + def _visit_var_attr_relation(self, relation, rhs_vars): + """visit an attribute relation with variable(s) in the RHS + + attribute variables are used either in the selection or for + unification (eg X attr1 A, Y attr2 A). In case of selection, + nothing to do here. + """ + contextrels = {} + attrvars = self._state.attr_vars + for var in rhs_vars: + try: + contextrels[var.name] = attrvars[var.name] + except KeyError: + attrvars[var.name] = relation + if not contextrels: + relation.children[1].accept(self, contextrels) + return '' + # at least one variable is already in attr_vars, this means we have to + # generate unification expression + lhssql = self._inlined_var_sql(relation.children[0].variable, + relation.r_type) + return '%s%s' % (lhssql, relation.children[1].accept(self, contextrels)) + + def _visit_attribute_relation(self, relation): + """generate SQL for an attribute relation""" + lhs, rhs = relation.get_parts() + rhssql = rhs.accept(self) + table = self._var_table(lhs.variable) + if table is None: + assert relation.r_type == 'eid' + lhssql = lhs.accept(self) + else: + try: + lhssql = self._varmap['%s.%s' % (lhs.name, relation.r_type)] + except KeyError: + lhssql = '%s.%s' % (table, relation.r_type) + try: + if relation._q_needcast == 'TODAY': + sql = 'DATE(%s)%s' % (lhssql, rhssql) + # XXX which cast function should be used + #elif relation._q_needcast == 'NOW': + # sql = 'TIMESTAMP(%s)%s' % (lhssql, rhssql) + else: + sql = '%s%s' % (lhssql, rhssql) + except AttributeError: + sql = '%s%s' % (lhssql, rhssql) + if lhs.variable.stinfo['optrelations']: + self.add_outer_join_condition(lhs.variable, table, sql) + else: + return sql + + def _visit_has_text_relation(self, relation): + """generate SQL for a has_text relation""" + lhs, rhs = relation.get_parts() + const = rhs.children[0] + alias = self._fti_table(relation) + jointo = lhs.accept(self) + restriction = '' + lhsvar = lhs.variable + me_is_principal = lhsvar.stinfo.get('principal') is relation + if me_is_principal: + if not lhsvar.stinfo['typerels']: + # the variable is using the fti table, no join needed + jointo = None + elif not lhsvar.name in self._varmap: + # join on entities instead of etype's table to get result for + # external entities on multisources configurations + ealias = lhsvar._q_sqltable = lhsvar.name + jointo = lhsvar._q_sql = '%s.eid' % ealias + self.add_table('entities AS %s' % ealias, ealias) + if not lhsvar._q_invariant or len(lhsvar.stinfo['possibletypes']) == 1: + restriction = " AND %s.type='%s'" % (ealias, self._state.solution[lhs.name]) + else: + etypes = ','.join("'%s'" % etype for etype in lhsvar.stinfo['possibletypes']) + restriction = " AND %s.type IN (%s)" % (ealias, etypes) + if isinstance(relation.parent, Not): + self._state.done.add(relation.parent) + not_ = True + else: + not_ = False + return self.dbms_helper.fti_restriction_sql(alias, const.eval(self._args), + jointo, not_) + restriction + + def visit_comparison(self, cmp, contextrels=None): + """generate SQL for a comparaison""" + if len(cmp.children) == 2: + lhs, rhs = cmp.children + else: + lhs = None + rhs = cmp.children[0] + operator = cmp.operator + if operator in ('IS', 'LIKE', 'ILIKE'): + if operator == 'ILIKE' and not self.dbms_helper.ilike_support: + operator = ' LIKE ' + else: + operator = ' %s ' % operator + elif isinstance(rhs, Function) and rhs.name == 'IN': + assert operator == '=' + operator = ' ' + if lhs is None: + return '%s%s'% (operator, rhs.accept(self, contextrels)) + return '%s%s%s'% (lhs.accept(self, contextrels), operator, + rhs.accept(self, contextrels)) + + def visit_mathexpression(self, mexpr, contextrels=None): + """generate SQL for a mathematic expression""" + lhs, rhs = mexpr.get_parts() + # check for string concatenation + operator = mexpr.operator + try: + if mexpr.operator == '+' and mexpr.get_type(self._state.solution, self._args) == 'String': + operator = '||' + except CoercionError: + pass + return '(%s %s %s)'% (lhs.accept(self, contextrels), operator, + rhs.accept(self, contextrels)) + + def visit_function(self, func, contextrels=None): + """generate SQL name for a function""" + # function_description will check function is supported by the backend + self.dbms_helper.function_description(func.name) + return '%s(%s)' % (func.name, ', '.join(c.accept(self, contextrels) + for c in func.children)) + + def visit_constant(self, constant, contextrels=None): + """generate SQL name for a constant""" + value = constant.value + if constant.type is None: + return 'NULL' + if constant.type == 'Int' and isinstance(constant.parent, SortTerm): + return constant.value + if constant.type in ('Date', 'Datetime'): + rel = constant.relation() + if rel is not None: + rel._q_needcast = value + return self.keyword_map[value]() + if constant.type == 'Substitute': + _id = constant.value + if isinstance(_id, unicode): + _id = _id.encode() + else: + _id = str(id(constant)).replace('-', '', 1) + if isinstance(value, unicode): + value = value.encode(self.dbencoding) + self._query_attrs[_id] = value + return '%%(%s)s' % _id + + def visit_variableref(self, variableref, contextrels=None): + """get the sql name for a variable reference""" + # use accept, .variable may be a variable or a columnalias + return variableref.variable.accept(self, contextrels) + + def visit_columnalias(self, colalias, contextrels=None): + """get the sql name for a subquery column alias""" + if colalias.name in self._varmap: + sql = self._varmap[colalias.name] + self.add_table(sql.split('.', 1)[0]) + return sql + return colalias._q_sql + + def visit_variable(self, variable, contextrels=None): + """get the table name and sql string for a variable""" + if contextrels is None and variable.name in self._state.done: + if self._in_wrapping_query: + return 'T1.%s' % self._state.aliases[variable.name] + return variable._q_sql + self._state.done.add(variable.name) + vtablename = None + if contextrels is None and variable.name in self._varmap: + sql, vtablename = self._var_info(variable) + elif variable.stinfo['attrvar']: + # attribute variable (systematically used in rhs of final + # relation(s)), get table name and sql from any rhs relation + sql = self._linked_var_sql(variable, contextrels) + elif variable._q_invariant: + # since variable is invariant, we know we won't found final relation + principal = variable.stinfo['principal'] + if principal is None: + vtablename = variable.name + self.add_table('entities AS %s' % variable.name, vtablename) + sql = '%s.eid' % vtablename + if variable.stinfo['typerels']: + # add additional restriction on entities.type column + pts = variable.stinfo['possibletypes'] + if len(pts) == 1: + etype = iter(variable.stinfo['possibletypes']).next() + restr = "%s.type='%s'" % (vtablename, etype) + else: + etypes = ','.join("'%s'" % et for et in pts) + restr = '%s.type IN (%s)' % (vtablename, etypes) + self._state.add_restriction(restr) + elif principal.r_type == 'has_text': + sql = '%s.%s' % (self._fti_table(principal), + self.dbms_helper.fti_uid_attr) + elif principal in variable.stinfo['rhsrelations']: + if self.schema.rschema(principal.r_type).inlined: + sql = self._linked_var_sql(variable, contextrels) + else: + sql = '%s.eid_to' % self._relation_table(principal) + else: + sql = '%s.eid_from' % self._relation_table(principal) + else: + # standard variable: get table name according to etype and use .eid + # attribute + sql, vtablename = self._var_info(variable) + variable._q_sqltable = vtablename + variable._q_sql = sql + return sql + + # various utilities ####################################################### + + def _extra_join_sql(self, relation, sql, var): + # if rhs var is invariant, and this relation is not its principal, + # generate extra join + try: + if not var.stinfo['principal'] is relation: + # need a predicable result for tests + return '%s=%s' % tuple(sorted((sql, var.accept(self)))) + except KeyError: + # no principal defined, relation is necessarily the principal and + # so nothing to return here + pass + return '' + + def _var_info(self, var): + # if current var or one of its attribute is selected , it *must* + # appear in the toplevel's FROM even if we're currently visiting + # a EXISTS node + if var.sqlscope is var.stmt: + scope = 0 + else: + scope = -1 + try: + sql = self._varmap[var.name] + table = sql.split('.', 1)[0] + if scope == -1: + scope = self._varmap_table_scope(var.stmt, table) + self.add_table(table, scope=scope) + except KeyError: + etype = self._state.solution[var.name] + # XXX this check should be moved in rql.stcheck + if self.schema.eschema(etype).is_final(): + raise BadRQLQuery(var.stmt.root) + table = var.name + sql = '%s.eid' % table + self.add_table('%s AS %s' % (etype, table), table, scope=scope) + return sql, table + + def _inlined_var_sql(self, var, rtype): + try: + sql = self._varmap['%s.%s' % (var.name, rtype)] + scope = var.sqlscope is var.stmt and 0 or -1 + self.add_table(sql.split('.', 1)[0], scope=scope) + except KeyError: + sql = '%s.%s' % (self._var_table(var), rtype) + #self._state.done.add(var.name) + return sql + + def _linked_var_sql(self, variable, contextrels=None): + if contextrels is None: + try: + return self._varmap[variable.name] + except KeyError: + pass + rel = (contextrels and contextrels.get(variable.name) or + variable.stinfo.get('principal') or + iter(variable.stinfo['rhsrelations']).next()) + linkedvar = rel.children[0].variable + if rel.r_type == 'eid': + return linkedvar.accept(self) + if isinstance(linkedvar, ColumnAlias): + raise BadRQLQuery('variable %s should be selected by the subquery' + % variable.name) + try: + sql = self._varmap['%s.%s' % (linkedvar.name, rel.r_type)] + except KeyError: + linkedvar.accept(self) + sql = '%s.%s' % (linkedvar._q_sqltable, rel.r_type) + return sql + + # tables handling ######################################################### + + def alias_and_add_table(self, tablename): + alias = '%s%s' % (tablename, self._state.count) + self._state.count += 1 + self.add_table('%s AS %s' % (tablename, alias), alias) + return alias + + def add_table(self, table, key=None, scope=-1): + if key is None: + key = table + if key in self._state.tables: + return + self._state.tables[key] = (len(self._state.actual_tables) - 1, table) + self._state.actual_tables[scope].append(table) + + def replace_tables_by_outer_join(self, substitute, lefttable, *tables): + for table in tables: + try: + scope, alias = self._state.tables[table] + self._state.actual_tables[scope].remove(alias) + except ValueError: # huum, not sure about what should be done here + msg = "%s already used in an outer join, don't know what to do!" + raise Exception(msg % table) + try: + tablealias = self._state.outer_tables[lefttable] + actualtables = self._state.actual_tables[-1] + except KeyError: + tablescope, tablealias = self._state.tables[lefttable] + actualtables = self._state.actual_tables[tablescope] + outerjoin = '%s %s' % (tablealias, substitute) + self._update_outer_tables(lefttable, actualtables, tablealias, outerjoin) + for table in tables: + self._state.outer_tables[table] = outerjoin + + def add_outer_join_condition(self, var, table, condition): + try: + tablealias = self._state.outer_tables[table] + actualtables = self._state.actual_tables[-1] + except KeyError: + for rel in var.stinfo['optrelations']: + self.visit_relation(rel) + assert self._state.outer_tables + self.add_outer_join_condition(var, table, condition) + return + before, after = tablealias.split(' AS %s ' % table, 1) + beforep, afterp = after.split(')', 1) + outerjoin = '%s AS %s %s AND %s) %s' % (before, table, beforep, + condition, afterp) + self._update_outer_tables(table, actualtables, tablealias, outerjoin) + + def _update_outer_tables(self, table, actualtables, oldalias, newalias): + actualtables.remove(oldalias) + actualtables.append(newalias) + # some tables which have already been used as outer table and replaced + # by may not be reused here, though their associated value + # in the outer_tables dict has to be updated as well + for table, outerexpr in self._state.outer_tables.iteritems(): + if outerexpr == oldalias: + self._state.outer_tables[table] = newalias + self._state.outer_tables[table] = newalias + + def _var_table(self, var): + var.accept(self)#.visit_variable(var) + return var._q_sqltable + + def _relation_table(self, relation): + """return the table alias used by the given relation""" + if relation in self._state.done: + return relation._q_sqltable + assert not self.schema.rschema(relation.r_type).is_final(), relation.r_type + rid = 'rel_%s%s' % (relation.r_type, self._state.count) + # relation's table is belonging to the root scope if it is the principal + # table of one of it's variable and if that variable belong's to parent + # scope + for varref in relation.iget_nodes(VariableRef): + var = varref.variable + if isinstance(var, ColumnAlias): + scope = 0 + break + # XXX may have a principal without being invariant for this generation, + # not sure this is a pb or not + if var.stinfo.get('principal') is relation and var.sqlscope is var.stmt: + scope = 0 + break + else: + scope = -1 + self._state.count += 1 + self.add_table('%s_relation AS %s' % (relation.r_type, rid), rid, scope=scope) + relation._q_sqltable = rid + self._state.done.add(relation) + return rid + + def _fti_table(self, relation): + if relation in self._state.done: + try: + return relation._q_sqltable + except AttributeError: + pass + self._state.done.add(relation) + alias = self.alias_and_add_table(self.dbms_helper.fti_table) + relation._q_sqltable = alias + return alias + + def _varmap_table_scope(self, select, table): + """since a varmap table may be used for multiple variable, its scope is + the most outer scope of each variables + """ + scope = -1 + for varname, alias in self._varmap.iteritems(): + # check '.' in varname since there are 'X.attribute' keys in varmap + if not '.' in varname and alias.split('.', 1)[0] == table: + if select.defined_vars[varname].sqlscope is select: + return 0 + return scope