0
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
1 |
"""a query preprocesser to handle quick search shortcuts for cubicweb |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
2 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
3 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
4 |
:organization: Logilab |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
5 |
:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
6 |
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
7 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
8 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
9 |
__docformat__ = "restructuredtext en" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
10 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
11 |
import re |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
12 |
from logging import getLogger |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
13 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
14 |
from rql import RQLSyntaxError, BadRQLQuery, parse |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
15 |
from rql.nodes import Relation |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
16 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
17 |
from cubicweb import Unauthorized |
661
|
18 |
from cubicweb.common.appobject import Component |
0
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
19 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
20 |
LOGGER = getLogger('cubicweb.magicsearch') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
21 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
22 |
def _get_approriate_translation(translations_found, eschema): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
23 |
"""return the first (should be the only one) possible translation according |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
24 |
to the given entity type |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
25 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
26 |
# get the list of all attributes / relations for this kind of entity |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
27 |
existing_relations = set(eschema.subject_relations()) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
28 |
consistent_translations = translations_found & existing_relations |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
29 |
if len(consistent_translations) == 0: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
30 |
return None |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
31 |
return consistent_translations.pop() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
32 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
33 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
34 |
def translate_rql_tree(rqlst, translations, schema): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
35 |
"""Try to translate each relation in the RQL syntax tree |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
36 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
37 |
:type rqlst: `rql.stmts.Statement` |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
38 |
:param rqlst: the RQL syntax tree |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
39 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
40 |
:type translations: dict |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
41 |
:param translations: the reverted l10n dict |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
42 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
43 |
:type schema: `cubicweb.schema.Schema` |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
44 |
:param schema: the application's schema |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
45 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
46 |
# var_types is used as a map : var_name / var_type |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
47 |
vartypes = {} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
48 |
# ambiguous_nodes is used as a map : relation_node / (var_name, available_translations) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
49 |
ambiguous_nodes = {} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
50 |
# For each relation node, check if it's a localized relation name |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
51 |
# If it's a localized name, then use the original relation name, else |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
52 |
# keep the existing relation name |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
53 |
for relation in rqlst.get_nodes(Relation): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
54 |
rtype = relation.r_type |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
55 |
lhs, rhs = relation.get_variable_parts() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
56 |
if rtype == 'is': |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
57 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
58 |
etype = translations[rhs.value] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
59 |
rhs.value = etype |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
60 |
except KeyError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
61 |
# If no translation found, leave the entity type as is |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
62 |
etype = rhs.value |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
63 |
# Memorize variable's type |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
64 |
vartypes[lhs.name] = etype |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
65 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
66 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
67 |
translation_set = translations[rtype] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
68 |
except KeyError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
69 |
pass # If no translation found, leave the relation type as is |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
70 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
71 |
# Only one possible translation, no ambiguity |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
72 |
if len(translation_set) == 1: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
73 |
relation.r_type = iter(translations[rtype]).next() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
74 |
# More than 1 possible translation => resolve it later |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
75 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
76 |
ambiguous_nodes[relation] = (lhs.name, translation_set) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
77 |
if ambiguous_nodes: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
78 |
resolve_ambiguities(vartypes, ambiguous_nodes, schema) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
79 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
80 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
81 |
def resolve_ambiguities(var_types, ambiguous_nodes, schema): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
82 |
"""Tries to resolve remaining ambiguities for translation |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
83 |
/!\ An ambiguity is when two different string can be localized with |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
84 |
the same string |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
85 |
A simple example: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
86 |
- 'name' in a company context will be localized as 'nom' in French |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
87 |
- but ... 'surname' will also be localized as 'nom' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
88 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
89 |
:type var_types: dict |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
90 |
:param var_types: a map : var_name / var_type |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
91 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
92 |
:type ambiguous_nodes: dict |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
93 |
:param ambiguous_nodes: a map : relation_node / (var_name, available_translations) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
94 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
95 |
:type schema: `cubicweb.schema.Schema` |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
96 |
:param schema: the application's schema |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
97 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
98 |
# Now, try to resolve ambiguous translations |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
99 |
for relation, (var_name, translations_found) in ambiguous_nodes.items(): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
100 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
101 |
vartype = var_types[var_name] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
102 |
except KeyError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
103 |
continue |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
104 |
# Get schema for this entity type |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
105 |
eschema = schema.eschema(vartype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
106 |
rtype = _get_approriate_translation(translations_found, eschema) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
107 |
if rtype is None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
108 |
continue |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
109 |
relation.r_type = rtype |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
110 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
111 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
112 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
113 |
QUOTED_SRE = re.compile(r'(.*?)(["\'])(.+?)\2') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
114 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
115 |
TRANSLATION_MAPS = {} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
116 |
def trmap(config, schema, lang): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
117 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
118 |
return TRANSLATION_MAPS[lang] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
119 |
except KeyError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
120 |
assert lang in config.translations, '%s %s' % (lang, config.translations) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
121 |
tr = config.translations[lang] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
122 |
langmap = {} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
123 |
for etype in schema.entities(): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
124 |
etype = str(etype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
125 |
langmap[tr(etype).capitalize()] = etype |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
126 |
langmap[etype.capitalize()] = etype |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
127 |
for rtype in schema.relations(): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
128 |
rtype = str(rtype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
129 |
langmap.setdefault(tr(rtype).lower(), set()).add(rtype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
130 |
langmap.setdefault(rtype, set()).add(rtype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
131 |
TRANSLATION_MAPS[lang] = langmap |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
132 |
return langmap |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
133 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
134 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
135 |
class BaseQueryProcessor(Component): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
136 |
__abstract__ = True |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
137 |
id = 'magicsearch_processor' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
138 |
# set something if you want explicit component search facility for the |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
139 |
# component |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
140 |
name = None |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
141 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
142 |
def process_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
143 |
args = self.preprocess_query(uquery, req) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
144 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
145 |
return req.execute(*args) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
146 |
finally: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
147 |
# rollback necessary to avoid leaving the connection in a bad state |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
148 |
req.cnx.rollback() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
149 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
150 |
def preprocess_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
151 |
raise NotImplementedError() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
152 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
153 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
154 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
155 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
156 |
class DoNotPreprocess(BaseQueryProcessor): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
157 |
"""this one returns the raw query and should be placed in first position |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
158 |
of the chain |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
159 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
160 |
name = 'rql' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
161 |
priority = 0 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
162 |
def preprocess_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
163 |
return uquery, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
164 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
165 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
166 |
class QueryTranslator(BaseQueryProcessor): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
167 |
""" parses through rql and translates into schema language entity names |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
168 |
and attributes |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
169 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
170 |
priority = 2 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
171 |
def preprocess_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
172 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
173 |
rqlst = parse(uquery, print_errors=False) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
174 |
except (RQLSyntaxError, BadRQLQuery), err: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
175 |
return uquery, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
176 |
schema = self.vreg.schema |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
177 |
# rql syntax tree will be modified in place if necessary |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
178 |
translate_rql_tree(rqlst, trmap(self.config, schema, req.lang), schema) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
179 |
return rqlst.as_string(), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
180 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
181 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
182 |
class QSPreProcessor(BaseQueryProcessor): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
183 |
"""Quick search preprocessor |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
184 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
185 |
preprocessing query in shortcut form to their RQL form |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
186 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
187 |
priority = 4 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
188 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
189 |
def preprocess_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
190 |
"""""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
191 |
args = None |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
192 |
self.req = req |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
193 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
194 |
# Process as if there was a quoted part |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
195 |
args = self._quoted_words_query(uquery) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
196 |
## No quoted part |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
197 |
except BadRQLQuery: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
198 |
words = uquery.split() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
199 |
if len(words) == 1: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
200 |
args = self._one_word_query(*words) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
201 |
elif len(words) == 2: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
202 |
args = self._two_words_query(*words) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
203 |
elif len(words) == 3: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
204 |
args = self._three_words_query(*words) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
205 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
206 |
args = self._multiple_words_query(words) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
207 |
return args |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
208 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
209 |
def _get_entity_type(self, word): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
210 |
"""check if the given word is matching an entity type, return it if |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
211 |
it's the case or raise BadRQLQuery if not |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
212 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
213 |
etype = word.capitalize() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
214 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
215 |
return trmap(self.config, self.vreg.schema, self.req.lang)[etype] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
216 |
except KeyError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
217 |
raise BadRQLQuery('%s is not a valid entity name' % etype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
218 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
219 |
def _get_attribute_name(self, word, eschema): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
220 |
"""check if the given word is matching an attribute of the given entity type, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
221 |
return it normalized if found or return it untransformed else |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
222 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
223 |
"""Returns the attributes's name as stored in the DB""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
224 |
# Need to convert from unicode to string (could be whatever) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
225 |
rtype = word.lower() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
226 |
# Find the entity name as stored in the DB |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
227 |
translations = trmap(self.config, self.vreg.schema, self.req.lang) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
228 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
229 |
translations = translations[rtype] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
230 |
except KeyError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
231 |
raise BadRQLQuery('%s is not a valid attribute for %s entity type' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
232 |
% (word, eschema)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
233 |
rtype = _get_approriate_translation(translations, eschema) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
234 |
if rtype is None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
235 |
raise BadRQLQuery('%s is not a valid attribute for %s entity type' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
236 |
% (word, eschema)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
237 |
return rtype |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
238 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
239 |
def _one_word_query(self, word): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
240 |
"""Specific process for one word query (case (1) of preprocess_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
241 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
242 |
# if this is an integer, then directly go to eid |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
243 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
244 |
eid = int(word) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
245 |
return 'Any X WHERE X eid %(x)s', {'x': eid}, 'x' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
246 |
except ValueError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
247 |
etype = self._get_entity_type(word) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
248 |
return '%s %s' % (etype, etype[0]), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
249 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
250 |
def _complete_rql(self, searchstr, etype, rtype=None, var=None, searchattr=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
251 |
searchop = '' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
252 |
if '%' in searchstr: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
253 |
if rtype: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
254 |
possible_etypes = self.schema.rschema(rtype).objects(etype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
255 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
256 |
possible_etypes = [self.schema.eschema(etype)] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
257 |
if searchattr or len(possible_etypes) == 1: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
258 |
searchattr = searchattr or possible_etypes[0].main_attribute() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
259 |
searchop = 'LIKE ' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
260 |
searchattr = searchattr or 'has_text' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
261 |
if var is None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
262 |
var = etype[0] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
263 |
return '%s %s %s%%(text)s' % (var, searchattr, searchop) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
264 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
265 |
def _two_words_query(self, word1, word2): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
266 |
"""Specific process for two words query (case (2) of preprocess_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
267 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
268 |
etype = self._get_entity_type(word1) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
269 |
# this is a valid RQL query : ("Person X", or "Person TMP1") |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
270 |
if len(word2) == 1 and word2.isupper(): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
271 |
return '%s %s' % (etype, word2), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
272 |
# else, suppose it's a shortcut like : Person Smith |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
273 |
rql = '%s %s WHERE %s' % (etype, etype[0], self._complete_rql(word2, etype)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
274 |
return rql, {'text': word2} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
275 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
276 |
def _three_words_query(self, word1, word2, word3): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
277 |
"""Specific process for three words query (case (3) of preprocess_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
278 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
279 |
etype = self._get_entity_type(word1) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
280 |
eschema = self.schema.eschema(etype) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
281 |
rtype = self._get_attribute_name(word2, eschema) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
282 |
# expand shortcut if rtype is a non final relation |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
283 |
if not self.schema.rschema(rtype).is_final(): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
284 |
return self._expand_shortcut(etype, rtype, word3) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
285 |
if '%' in word3: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
286 |
searchop = 'LIKE ' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
287 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
288 |
searchop = '' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
289 |
rql = '%s %s WHERE %s' % (etype, etype[0], |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
290 |
self._complete_rql(word3, etype, searchattr=rtype)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
291 |
return rql, {'text': word3} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
292 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
293 |
def _multiple_words_query(self, words): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
294 |
"""specific process for more than 3 words query""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
295 |
return ' '.join(words), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
296 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
297 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
298 |
def _expand_shortcut(self, etype, rtype, searchstr): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
299 |
"""Expands shortcut queries on a non final relation to use has_text or |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
300 |
the main attribute (according to possible entity type) if '%' is used in the |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
301 |
search word |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
302 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
303 |
Transforms : 'person worksat IBM' into |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
304 |
'Personne P WHERE P worksAt C, C has_text "IBM"' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
305 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
306 |
# check out all possilbe entity types for the relation represented |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
307 |
# by 'rtype' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
308 |
mainvar = etype[0] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
309 |
searchvar = mainvar + '1' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
310 |
rql = '%s %s WHERE %s %s %s, %s' % (etype, mainvar, # Person P |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
311 |
mainvar, rtype, searchvar, # P worksAt C |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
312 |
self._complete_rql(searchstr, etype, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
313 |
rtype=rtype, var=searchvar)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
314 |
return rql, {'text': searchstr} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
315 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
316 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
317 |
def _quoted_words_query(self, ori_rql): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
318 |
"""Specific process when there's a "quoted" part |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
319 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
320 |
m = QUOTED_SRE.match(ori_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
321 |
# if there's no quoted part, then no special pre-processing to do |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
322 |
if m is None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
323 |
raise BadRQLQuery("unable to handle request %r" % ori_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
324 |
left_words = m.group(1).split() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
325 |
quoted_part = m.group(3) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
326 |
# Case (1) : Company "My own company" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
327 |
if len(left_words) == 1: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
328 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
329 |
word1 = left_words[0] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
330 |
return self._two_words_query(word1, quoted_part) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
331 |
except BadRQLQuery, error: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
332 |
raise BadRQLQuery("unable to handle request %r" % ori_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
333 |
# Case (2) : Company name "My own company"; |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
334 |
elif len(left_words) == 2: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
335 |
word1, word2 = left_words |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
336 |
return self._three_words_query(word1, word2, quoted_part) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
337 |
# return ori_rql |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
338 |
raise BadRQLQuery("unable to handle request %r" % ori_rql) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
339 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
340 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
341 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
342 |
class FullTextTranslator(BaseQueryProcessor): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
343 |
priority = 10 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
344 |
name = 'text' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
345 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
346 |
def preprocess_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
347 |
"""suppose it's a plain text query""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
348 |
return 'Any X WHERE X has_text %(text)s', {'text': uquery} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
349 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
350 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
351 |
|
661
|
352 |
class MagicSearchComponent(Component): |
0
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
353 |
id = 'magicsearch' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
354 |
def __init__(self, req, rset=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
355 |
super(MagicSearchComponent, self).__init__(req, rset) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
356 |
processors = [] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
357 |
self.by_name = {} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
358 |
for processorcls in self.vreg.registry_objects('components', |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
359 |
'magicsearch_processor'): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
360 |
# instantiation needed |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
361 |
processor = processorcls() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
362 |
processors.append(processor) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
363 |
if processor.name is not None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
364 |
assert not processor.name in self.by_name |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
365 |
self.by_name[processor.name.lower()] = processor |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
366 |
self.processors = sorted(processors, key=lambda x: x.priority) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
367 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
368 |
def process_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
369 |
assert isinstance(uquery, unicode) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
370 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
371 |
procname, query = uquery.split(':', 1) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
372 |
proc = self.by_name[procname.strip().lower()] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
373 |
uquery = query.strip() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
374 |
except: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
375 |
# use processor chain |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
376 |
unauthorized = None |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
377 |
for proc in self.processors: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
378 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
379 |
return proc.process_query(uquery, req) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
380 |
# FIXME : we don't want to catch any exception type here ! |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
381 |
except (RQLSyntaxError, BadRQLQuery): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
382 |
pass |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
383 |
except Unauthorized, ex: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
384 |
unauthorized = ex |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
385 |
continue |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
386 |
except Exception, ex: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
387 |
LOGGER.debug('%s: %s', ex.__class__.__name__, ex) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
388 |
continue |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
389 |
if unauthorized: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
390 |
raise unauthorized |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
391 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
392 |
# let exception propagate |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
393 |
return proc.process_query(uquery, req) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
394 |
raise BadRQLQuery(req._('sorry, the server is unable to handle this query')) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
395 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
396 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
397 |
# Do not make a strong dependency on NlpTools |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
398 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
399 |
from NlpTools.rqltools.client import RQLClient |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
400 |
except ImportError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
401 |
LOGGER.info('could not import RQLClient (NlpTools)') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
402 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
403 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
404 |
from Pyro.errors import NamingError |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
405 |
except ImportError: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
406 |
LOGGER.warning("pyro is not installed, can't try to connect to nlp server") |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
407 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
408 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
409 |
class NLPProcessor(BaseQueryProcessor): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
410 |
priority = 8 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
411 |
nlp_agent = RQLClient('ivan') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
412 |
def preprocess_query(self, uquery, req): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
413 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
414 |
answer = self.nlp_agent.get_translation(uquery) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
415 |
if not answer: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
416 |
raise BadRQLQuery(uquery) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
417 |
return answer or uquery, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
418 |
except Exception, ex: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
419 |
LOGGER.exception(str(ex)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
420 |
return uquery, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
421 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
422 |
except NamingError: # NlpTools available but no server registered |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
423 |
LOGGER.warning('could not find any RQLServer object named "ivan"') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
424 |
|