cleanup/document/refactor
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 23 Jul 2009 12:08:39 +0200
changeset 2430 7d9ed6c740ec
parent 2429 d3f4bffb57d0
child 2431 93c061eac647
cleanup/document/refactor
spa2rql.py
--- a/spa2rql.py	Thu Jul 23 11:23:12 2009 +0200
+++ b/spa2rql.py	Thu Jul 23 12:08:39 2009 +0200
@@ -15,7 +15,11 @@
 
 class UnsupportedQuery(Exception): pass
 
+
 class QueryInfo(object):
+    """wrapper class containing necessary information to generate a RQL query
+    from a sparql syntax tree
+    """
     def __init__(self, sparqlst):
         self.sparqlst = sparqlst
         if sparqlst.selected == ['*']:
@@ -23,10 +27,12 @@
         else:
             self.selection = [var.name.upper() for var in sparqlst.selected]
         self.possible_types = {}
+        self.infer_types_info = []
         self.union_params = []
         self.restrictions = []
 
     def finalize(self):
+        """return corresponding rql query"""
         for varname, ptypes in self.possible_types.iteritems():
             if len(ptypes) == 1:
                 self.restrictions.append('%s is %s' % (varname, iter(ptypes).next()))
@@ -54,13 +60,72 @@
         return ' UNION '.join(rqls)
 
     def set_possible_types(self, var, varpossibletypes):
+        """set/restrict possible types for the given variable.
+
+        :return: True if something changed, else false.
+        :raise: TypeResolverException if no more type allowed
+        """
         varpossibletypes = set(varpossibletypes)
         try:
-            self.possible_types[var] &= varpossibletypes
-            if not self.possible_types[var]:
+            ctypes = self.possible_types[var]
+            nbctypes = len(ctypes)
+            ctypes &= varpossibletypes
+            if not ctypes:
                 raise TypeResolverException()
+            return len(ctypes) != nbctypes
         except KeyError:
             self.possible_types[var] = varpossibletypes
+            return True
+
+    def infer_types(self):
+        # XXX should use something similar to rql.analyze for proper type inference
+        modified = True
+        # loop to infer types until nothing changed
+        while modified:
+            modified = False
+            for yams_predicates, subjvar, obj in self.infer_types_info:
+                nbchoices = len(yams_predicates)
+                # get possible types for the subject variable, according to the
+                # current predicate
+                svptypes = set(s for s, r, o in yams_predicates)
+                if not '*' in svptypes:
+                    if self.set_possible_types(subjvar, svptypes):
+                        modified = True
+                # restrict predicates according to allowed subject var types
+                if subjvar in self.possible_types:
+                    yams_predicates = [(s, r, o) for s, r, o in yams_predicates
+                                       if s == '*' or s in self.possible_types[subjvar]]
+                if isinstance(obj, ast.SparqlVar):
+                    # make a valid rql var name
+                    objvar = obj.name.upper()
+                    # get possible types for the object variable, according to
+                    # the current predicate
+                    ovptypes = set(o for s, r, o in yams_predicates)
+                    if not '*' in ovptypes:
+                        if self.set_possible_types(objvar, ovptypes):
+                            modified = True
+                    # restrict predicates according to allowed object var types
+                    if objvar in self.possible_types:
+                        yams_predicates = [(s, r, o) for s, r, o in yams_predicates
+                                           if o == '*' or o in self.possible_types[objvar]]
+                # ensure this still make sense
+                if not yams_predicates:
+                    raise TypeResolverException()
+                if len(yams_predicates) != nbchoices:
+                    modified = True
+        # now, for each predicate
+        for yams_predicates, subjvar, obj in self.infer_types_info:
+            rel = yams_predicates[0]
+            objvar = obj.name.upper()
+            # if there are several yams relation type equivalences, we will have
+            # to generate several unioned rql queries
+            for s, r, o in yams_predicates[1:]:
+                if r != rel[1]:
+                    self.union_params.append((yams_predicates, subjvar, objvar))
+                    break
+            else:
+                # else we can simply add it to base rql restrictions
+                self.restrictions.append('%s %s %s' % (subjvar, rel[1], objvar))
 
 
 class Sparql2rqlTranslator(object):
@@ -75,39 +140,28 @@
         for subj, predicate, obj in sparqlst.where:
             if not isinstance(subj, ast.SparqlVar):
                 raise UnsupportedQuery()
+            # make a valid rql var name
             subjvar = subj.name.upper()
-            if predicate == ('', 'a'): # special 'is' relation
+            if predicate == ('', 'a'):
+                # special 'is' relation
                 if not isinstance(obj, tuple):
                     raise UnsupportedQuery()
+                # restrict possible types for the subject variable
                 qi.set_possible_types(
                     subjvar, xy.yeq(':'.join(obj), isentity=True))
             else:
+                # 'regular' relation (eg not 'is')
                 if not isinstance(predicate, tuple):
                     raise UnsupportedQuery()
-                releq = xy.yeq(':'.join(predicate))
-                svptypes = set(s for s, r, o in releq)
-                if not '*' in svptypes:
-                    qi.set_possible_types(subjvar, svptypes)
-                if subjvar in qi.possible_types:
-                    releq = [(s, r, o) for s, r, o in releq
-                             if s == '*' or s in qi.possible_types[subjvar]]
+                # list of 3-uple
+                #   (yams etype (subject), yams rtype, yams etype (object))
+                # where subject / object entity type may '*' if not specified
+                yams_predicates = xy.yeq(':'.join(predicate))
+                qi.infer_types_info.append((yams_predicates, subjvar, obj))
                 if isinstance(obj, ast.SparqlVar):
+                    # make a valid rql var name
                     objvar = obj.name.upper()
-                    ovptypes = set(o for s, r, o in releq)
-                    if not '*' in ovptypes:
-                        qi.set_possible_types(objvar, ovptypes)
-                    if objvar in qi.possible_types:
-                        releq = [(s, r, o) for s, r, o in releq
-                                 if o == '*' or o in qi.possible_types[objvar]]
                 else:
                     raise UnsupportedQuery()
-                rel = releq[0]
-                for s, r, o in releq[1:]:
-                    if r != rel[1]:
-                        qi.union_params.append((releq, subjvar, objvar))
-                        break
-                else:
-                    qi.restrictions.append('%s %s %s' % (subj.name.upper(),
-                                                         rel[1],
-                                                         obj.name.upper()))
+        qi.infer_types()
         return qi