[rql2sql] more cases fixed where something is wrongly added to GROUPBY, causing unexpected results for the query stable
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 29 Jun 2011 18:27:23 +0200
branchstable
changeset 7579 5a610b34d2d2
parent 7576 1b7fa4df1f83
child 7580 328542c4fdc8
child 7585 8bad94040b1b
[rql2sql] more cases fixed where something is wrongly added to GROUPBY, causing unexpected results for the query
server/sources/rql2sql.py
server/test/unittest_rql2sql.py
--- a/server/sources/rql2sql.py	Wed Jun 29 14:05:14 2011 +0200
+++ b/server/sources/rql2sql.py	Wed Jun 29 18:27:23 2011 +0200
@@ -247,28 +247,32 @@
                                       table + '.eid_from')
     return switchedsql.replace('__eid_from__', table + '.eid_to')
 
-def sort_term_selection(sorts, selectedidx, rqlst, groups):
+def sort_term_selection(sorts, rqlst, groups):
     # XXX beurk
     if isinstance(rqlst, list):
         def append(term):
             rqlst.append(term)
+        selectionidx = set(str(term) for term in rqlst)
     else:
         def append(term):
             rqlst.selection.append(term.copy(rqlst))
+        selectionidx = set(str(term) for term in rqlst.selection)
+
     for sortterm in sorts:
         term = sortterm.term
-        if not isinstance(term, Constant) and not str(term) in selectedidx:
-            selectedidx.append(str(term))
+        if not isinstance(term, Constant) and not str(term) in selectionidx:
+            selectionidx.add(str(term))
             append(term)
             if groups:
                 for vref in term.iget_nodes(VariableRef):
                     if not vref in groups:
                         groups.append(vref)
 
-def fix_selection_and_group(rqlst, selectedidx, needwrap, selectsortterms,
+def fix_selection_and_group(rqlst, needwrap, selectsortterms,
                             sorts, groups, having):
     if selectsortterms and sorts:
-        sort_term_selection(sorts, selectedidx, rqlst, not needwrap and groups)
+        sort_term_selection(sorts, rqlst, not needwrap and groups)
+    groupvrefs = [vref for term in groups for vref in term.iget_nodes(VariableRef)]
     if sorts and groups:
         # when a query is grouped, ensure sort terms are grouped as well
         for sortterm in sorts:
@@ -277,19 +281,22 @@
                     (isinstance(term, Function) and
                      get_func_descr(term.name).aggregat)):
                 for vref in term.iget_nodes(VariableRef):
-                    if not vref in groups:
+                    if not vref in groupvrefs:
                         groups.append(vref)
-    if needwrap:
+                        groupvrefs.append(vref)
+    if needwrap and (groups or having):
+        selectedidx = set(vref.name for term in rqlst.selection
+                          for vref in term.get_nodes(VariableRef))
         if groups:
-            for vref in groups:
-                if not vref.name in selectedidx:
-                    selectedidx.append(vref.name)
+            for vref in groupvrefs:
+                if vref.name not in selectedidx:
+                    selectedidx.add(vref.name)
                     rqlst.selection.append(vref)
         if having:
             for term in having:
                 for vref in term.iget_nodes(VariableRef):
-                    if not vref.name in selectedidx:
-                        selectedidx.append(vref.name)
+                    if vref.name not in selectedidx:
+                        selectedidx.add(vref.name)
                         rqlst.selection.append(vref)
 
 def iter_mapped_var_sels(stmt, variable):
@@ -806,23 +813,16 @@
         # treat subqueries
         self._subqueries_sql(select, state)
         # generate sql for this select node
-        selectidx = [str(term) for term in select.selection]
         if needwrap:
             outerselection = origselection[:]
             if sorts and selectsortterms:
-                outerselectidx = [str(term) for term in outerselection]
                 if distinct:
-                    sort_term_selection(sorts, outerselectidx,
-                                        outerselection, groups)
-            else:
-                outerselectidx = selectidx[:]
-        fix_selection_and_group(select, selectidx, needwrap,
-                                selectsortterms, sorts, groups, having)
+                    sort_term_selection(sorts, outerselection, groups)
+        fix_selection_and_group(select, needwrap, selectsortterms,
+                                sorts, groups, having)
         if needwrap:
-            fselectidx = outerselectidx
             fneedwrap = len(outerselection) != len(origselection)
         else:
-            fselectidx = selectidx
             fneedwrap = len(select.selection) != len(origselection)
         if fneedwrap:
             needalias = True
@@ -854,8 +854,12 @@
             # sort
             if sorts:
                 sqlsortterms = []
+                if needwrap:
+                    selectidx = [str(term) for term in outerselection]
+                else:
+                    selectidx = [str(term) for term in select.selection]
                 for sortterm in sorts:
-                    _term = self._sortterm_sql(sortterm, fselectidx)
+                    _term = self._sortterm_sql(sortterm, selectidx)
                     if _term is not None:
                         sqlsortterms.append(_term)
                 if sqlsortterms:
--- a/server/test/unittest_rql2sql.py	Wed Jun 29 14:05:14 2011 +0200
+++ b/server/test/unittest_rql2sql.py	Wed Jun 29 18:27:23 2011 +0200
@@ -1529,14 +1529,14 @@
 ORDER BY ts_rank(appears0.words, to_tsquery('default', 'hip&hop&momo'))*appears0.weight"""),
 
             ('Any X ORDERBY FTIRANK(X) WHERE X has_text "toto tata", X name "tutu", X is IN (Basket,Folder)',
-             """SELECT T1.C0 FROM (SELECT _X.cw_eid AS C0, ts_rank(appears0.words, to_tsquery('default', 'toto&tata'))*appears0.weight AS C1
+             """SELECT _X.cw_eid
 FROM appears AS appears0, cw_Basket AS _X
 WHERE appears0.words @@ to_tsquery('default', 'toto&tata') AND appears0.uid=_X.cw_eid AND _X.cw_name=tutu
 UNION ALL
-SELECT _X.cw_eid AS C0, ts_rank(appears0.words, to_tsquery('default', 'toto&tata'))*appears0.weight AS C1
+SELECT _X.cw_eid
 FROM appears AS appears0, cw_Folder AS _X
 WHERE appears0.words @@ to_tsquery('default', 'toto&tata') AND appears0.uid=_X.cw_eid AND _X.cw_name=tutu
-ORDER BY 2) AS T1"""),
+ORDER BY ts_rank(appears0.words, to_tsquery('default', 'toto&tata'))*appears0.weight"""),
 
             ('Personne X ORDERBY FTIRANK(X),FTIRANK(S) WHERE X has_text %(text)s, X travaille S, S has_text %(text)s',
              """SELECT _X.eid
@@ -1643,6 +1643,16 @@
 WHERE rel_owned_by0.eid_from=_G.cw_eid AND rel_owned_by0.eid_to=1122
 GROUP BY _G.cw_eid''')
 
+    def test_groupby_orderby_insertion_dont_modify_intention(self):
+        self._check('Any YEAR(XECT)*100+MONTH(XECT), COUNT(X),SUM(XCE),AVG(XSCT-XECT) '
+                    'GROUPBY YEAR(XECT),MONTH(XECT) ORDERBY 1 '
+                    'WHERE X creation_date XSCT, X modification_date XECT, '
+                    'X ordernum XCE, X is CWAttribute',
+                    '''SELECT ((CAST(EXTRACT(YEAR from _X.cw_modification_date) AS INTEGER) * 100) + CAST(EXTRACT(MONTH from _X.cw_modification_date) AS INTEGER)), COUNT(_X.cw_eid), SUM(_X.cw_ordernum), AVG((_X.cw_creation_date - _X.cw_modification_date))
+FROM cw_CWAttribute AS _X
+GROUP BY CAST(EXTRACT(YEAR from _X.cw_modification_date) AS INTEGER),CAST(EXTRACT(MONTH from _X.cw_modification_date) AS INTEGER)
+ORDER BY 1'''),
+
 
 class SqlServer2005SQLGeneratorTC(PostgresSQLGeneratorTC):
     backend = 'sqlserver2005'
@@ -1749,6 +1759,15 @@
         for t in self._parse(WITH_LIMIT):# + ADVANCED_WITH_LIMIT_OR_ORDERBY):
             yield t
 
+    def test_groupby_orderby_insertion_dont_modify_intention(self):
+        self._check('Any YEAR(XECT)*100+MONTH(XECT), COUNT(X),SUM(XCE),AVG(XSCT-XECT) '
+                    'GROUPBY YEAR(XECT),MONTH(XECT) ORDERBY 1 '
+                    'WHERE X creation_date XSCT, X modification_date XECT, '
+                    'X ordernum XCE, X is CWAttribute',
+                    '''SELECT ((YEAR(_X.cw_modification_date) * 100) + MONTH(_X.cw_modification_date)), COUNT(_X.cw_eid), SUM(_X.cw_ordernum), AVG((_X.cw_creation_date - _X.cw_modification_date))
+FROM cw_CWAttribute AS _X
+GROUP BY YEAR(_X.cw_modification_date),MONTH(_X.cw_modification_date)
+ORDER BY 1'''),
 
 
 class SqliteSQLGeneratorTC(PostgresSQLGeneratorTC):
@@ -1880,6 +1899,16 @@
 FROM cw_CWUser AS _X
 WHERE ((YEAR(_X.cw_creation_date)=2010) OR (_X.cw_creation_date IS NULL))''')
 
+    def test_groupby_orderby_insertion_dont_modify_intention(self):
+        self._check('Any YEAR(XECT)*100+MONTH(XECT), COUNT(X),SUM(XCE),AVG(XSCT-XECT) '
+                    'GROUPBY YEAR(XECT),MONTH(XECT) ORDERBY 1 '
+                    'WHERE X creation_date XSCT, X modification_date XECT, '
+                    'X ordernum XCE, X is CWAttribute',
+                    '''SELECT ((YEAR(_X.cw_modification_date) * 100) + MONTH(_X.cw_modification_date)), COUNT(_X.cw_eid), SUM(_X.cw_ordernum), AVG((_X.cw_creation_date - _X.cw_modification_date))
+FROM cw_CWAttribute AS _X
+GROUP BY YEAR(_X.cw_modification_date),MONTH(_X.cw_modification_date)
+ORDER BY 1'''),
+
 
 
 class MySQLGenerator(PostgresSQLGeneratorTC):
@@ -1976,6 +2005,16 @@
 FROM (SELECT 1) AS _T
 WHERE NOT (EXISTS(SELECT 1 FROM in_group_relation AS rel_in_group0))''')
 
+    def test_groupby_orderby_insertion_dont_modify_intention(self):
+        self._check('Any YEAR(XECT)*100+MONTH(XECT), COUNT(X),SUM(XCE),AVG(XSCT-XECT) '
+                    'GROUPBY YEAR(XECT),MONTH(XECT) ORDERBY 1 '
+                    'WHERE X creation_date XSCT, X modification_date XECT, '
+                    'X ordernum XCE, X is CWAttribute',
+                    '''SELECT ((EXTRACT(YEAR from _X.cw_modification_date) * 100) + EXTRACT(MONTH from _X.cw_modification_date)), COUNT(_X.cw_eid), SUM(_X.cw_ordernum), AVG((_X.cw_creation_date - _X.cw_modification_date))
+FROM cw_CWAttribute AS _X
+GROUP BY EXTRACT(YEAR from _X.cw_modification_date),EXTRACT(MONTH from _X.cw_modification_date)
+ORDER BY 1'''),
+
 
 class removeUnsusedSolutionsTC(TestCase):
     def test_invariant_not_varying(self):