spa2rql.py
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 22 Jul 2009 19:42:35 +0200
changeset 2427 3e0ef847a546
parent 2422 96da7dc42eb5
child 2430 7d9ed6c740ec
permissions -rw-r--r--
distinct / reduced support
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2422
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
"""SPARQL -> RQL translator
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
:organization: Logilab
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
:copyright: 2009 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
"""
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
from logilab.common import make_domains
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
from rql import TypeResolverException
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
from fyzz.yappsparser import parse
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
from fyzz import ast
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
from cubicweb.xy import xy
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
class UnsupportedQuery(Exception): pass
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
class QueryInfo(object):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
    def __init__(self, sparqlst):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
        self.sparqlst = sparqlst
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    21
        if sparqlst.selected == ['*']:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
            self.selection = [var.upper() for var in sparqlst.variables]
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    23
        else:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
            self.selection = [var.name.upper() for var in sparqlst.selected]
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    25
        self.possible_types = {}
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    26
        self.union_params = []
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
        self.restrictions = []
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
    def finalize(self):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    30
        for varname, ptypes in self.possible_types.iteritems():
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    31
            if len(ptypes) == 1:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
                self.restrictions.append('%s is %s' % (varname, iter(ptypes).next()))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
        unions = []
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
        for releq, subjvar, objvar in self.union_params:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
            thisunions = []
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
            for st, rt, ot in releq:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
                thisunions.append(['%s %s %s' % (subjvar, rt, objvar)])
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
                if st != '*':
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
                    thisunions[-1].append('%s is %s' % (subjvar, st))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
                if ot != '*':
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
                    thisunions[-1].append('%s is %s' % (objvar, ot))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
            if not unions:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
                unions = thisunions
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
            else:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    45
                unions = zip(*make_domains([unions, thisunions]))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    46
        baserql = 'Any %s WHERE %s' % (', '.join(self.selection),
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    47
                                       ', '.join(self.restrictions))
2427
3e0ef847a546 distinct / reduced support
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 2422
diff changeset
    48
        if self.sparqlst.distinct:
3e0ef847a546 distinct / reduced support
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 2422
diff changeset
    49
            baserql = 'DISTINCT ' + baserql
2422
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
        if not unions:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
            return baserql
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
        rqls = ['(%s, %s)' % (baserql, ', '.join(unionrestrs))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
                for unionrestrs in unions]
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    54
        return ' UNION '.join(rqls)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    55
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    56
    def set_possible_types(self, var, varpossibletypes):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
        varpossibletypes = set(varpossibletypes)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
        try:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
            self.possible_types[var] &= varpossibletypes
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
            if not self.possible_types[var]:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
                raise TypeResolverException()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
        except KeyError:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
            self.possible_types[var] = varpossibletypes
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
class Sparql2rqlTranslator(object):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
    def __init__(self, yschema):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
        self.yschema = yschema
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
    def translate(self, sparql):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
        sparqlst = parse(sparql)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
        if sparqlst.type != 'select':
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
            raise UnsupportedQuery()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
        qi = QueryInfo(sparqlst)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
        for subj, predicate, obj in sparqlst.where:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
            if not isinstance(subj, ast.SparqlVar):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
                raise UnsupportedQuery()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
            subjvar = subj.name.upper()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
            if predicate == ('', 'a'): # special 'is' relation
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
                if not isinstance(obj, tuple):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
                    raise UnsupportedQuery()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    82
                qi.set_possible_types(
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    83
                    subjvar, xy.yeq(':'.join(obj), isentity=True))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    84
            else:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    85
                if not isinstance(predicate, tuple):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    86
                    raise UnsupportedQuery()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    87
                releq = xy.yeq(':'.join(predicate))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    88
                svptypes = set(s for s, r, o in releq)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    89
                if not '*' in svptypes:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    90
                    qi.set_possible_types(subjvar, svptypes)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    91
                if subjvar in qi.possible_types:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
                    releq = [(s, r, o) for s, r, o in releq
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    93
                             if s == '*' or s in qi.possible_types[subjvar]]
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    94
                if isinstance(obj, ast.SparqlVar):
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    95
                    objvar = obj.name.upper()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
                    ovptypes = set(o for s, r, o in releq)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
                    if not '*' in ovptypes:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
                        qi.set_possible_types(objvar, ovptypes)
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
                    if objvar in qi.possible_types:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
                        releq = [(s, r, o) for s, r, o in releq
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
                                 if o == '*' or o in qi.possible_types[objvar]]
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
                else:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   103
                    raise UnsupportedQuery()
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   104
                rel = releq[0]
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   105
                for s, r, o in releq[1:]:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   106
                    if r != rel[1]:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   107
                        qi.union_params.append((releq, subjvar, objvar))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   108
                        break
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   109
                else:
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   110
                    qi.restrictions.append('%s %s %s' % (subj.name.upper(),
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   111
                                                         rel[1],
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   112
                                                         obj.name.upper()))
96da7dc42eb5 quick and dirty support from simple sparql queries + base ui
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
        return qi