author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Thu, 21 Apr 2011 12:35:41 +0200 | |
branch | stable |
changeset 7342 | d1c8b5b3531c |
parent 5768 | 1e73a466aa69 |
child 7815 | 2a164a9cf81c |
permissions | -rw-r--r-- |
5421
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
1 |
# copyright 2003-2010 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
3 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
4 |
# This file is part of CubicWeb. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
5 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
9 |
# any later version. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
10 |
# |
5424
8ecbcbff9777
replace logilab-common by CubicWeb in disclaimer
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5421
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
5421
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
14 |
# details. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
15 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
5768
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
18 |
"""a query processor to handle quick search shortcuts for cubicweb""" |
0 | 19 |
|
20 |
__docformat__ = "restructuredtext en" |
|
21 |
||
22 |
import re |
|
23 |
from logging import getLogger |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
24 |
from warnings import warn |
0 | 25 |
|
26 |
from rql import RQLSyntaxError, BadRQLQuery, parse |
|
27 |
from rql.nodes import Relation |
|
28 |
||
3377
dd9d292b6a6d
use __regid__ instead of id on appobject classes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3369
diff
changeset
|
29 |
from cubicweb import Unauthorized, typed_eid |
984 | 30 |
from cubicweb.view import Component |
0 | 31 |
|
32 |
LOGGER = getLogger('cubicweb.magicsearch') |
|
33 |
||
34 |
def _get_approriate_translation(translations_found, eschema): |
|
35 |
"""return the first (should be the only one) possible translation according |
|
36 |
to the given entity type |
|
37 |
""" |
|
38 |
# get the list of all attributes / relations for this kind of entity |
|
39 |
existing_relations = set(eschema.subject_relations()) |
|
40 |
consistent_translations = translations_found & existing_relations |
|
41 |
if len(consistent_translations) == 0: |
|
42 |
return None |
|
43 |
return consistent_translations.pop() |
|
44 |
||
45 |
||
46 |
def translate_rql_tree(rqlst, translations, schema): |
|
47 |
"""Try to translate each relation in the RQL syntax tree |
|
48 |
||
49 |
:type rqlst: `rql.stmts.Statement` |
|
50 |
:param rqlst: the RQL syntax tree |
|
51 |
||
52 |
:type translations: dict |
|
53 |
:param translations: the reverted l10n dict |
|
54 |
||
55 |
:type schema: `cubicweb.schema.Schema` |
|
2476
1294a6bdf3bf
application -> instance where it makes sense
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
56 |
:param schema: the instance's schema |
0 | 57 |
""" |
58 |
# var_types is used as a map : var_name / var_type |
|
59 |
vartypes = {} |
|
60 |
# ambiguous_nodes is used as a map : relation_node / (var_name, available_translations) |
|
61 |
ambiguous_nodes = {} |
|
62 |
# For each relation node, check if it's a localized relation name |
|
63 |
# If it's a localized name, then use the original relation name, else |
|
64 |
# keep the existing relation name |
|
65 |
for relation in rqlst.get_nodes(Relation): |
|
66 |
rtype = relation.r_type |
|
67 |
lhs, rhs = relation.get_variable_parts() |
|
68 |
if rtype == 'is': |
|
69 |
try: |
|
70 |
etype = translations[rhs.value] |
|
71 |
rhs.value = etype |
|
72 |
except KeyError: |
|
73 |
# If no translation found, leave the entity type as is |
|
74 |
etype = rhs.value |
|
75 |
# Memorize variable's type |
|
76 |
vartypes[lhs.name] = etype |
|
77 |
else: |
|
78 |
try: |
|
79 |
translation_set = translations[rtype] |
|
80 |
except KeyError: |
|
81 |
pass # If no translation found, leave the relation type as is |
|
82 |
else: |
|
83 |
# Only one possible translation, no ambiguity |
|
84 |
if len(translation_set) == 1: |
|
85 |
relation.r_type = iter(translations[rtype]).next() |
|
86 |
# More than 1 possible translation => resolve it later |
|
87 |
else: |
|
88 |
ambiguous_nodes[relation] = (lhs.name, translation_set) |
|
89 |
if ambiguous_nodes: |
|
90 |
resolve_ambiguities(vartypes, ambiguous_nodes, schema) |
|
91 |
||
92 |
||
93 |
def resolve_ambiguities(var_types, ambiguous_nodes, schema): |
|
94 |
"""Tries to resolve remaining ambiguities for translation |
|
95 |
/!\ An ambiguity is when two different string can be localized with |
|
96 |
the same string |
|
97 |
A simple example: |
|
98 |
- 'name' in a company context will be localized as 'nom' in French |
|
99 |
- but ... 'surname' will also be localized as 'nom' |
|
100 |
||
101 |
:type var_types: dict |
|
102 |
:param var_types: a map : var_name / var_type |
|
103 |
||
104 |
:type ambiguous_nodes: dict |
|
105 |
:param ambiguous_nodes: a map : relation_node / (var_name, available_translations) |
|
106 |
||
107 |
:type schema: `cubicweb.schema.Schema` |
|
2476
1294a6bdf3bf
application -> instance where it makes sense
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
108 |
:param schema: the instance's schema |
0 | 109 |
""" |
110 |
# Now, try to resolve ambiguous translations |
|
111 |
for relation, (var_name, translations_found) in ambiguous_nodes.items(): |
|
112 |
try: |
|
113 |
vartype = var_types[var_name] |
|
114 |
except KeyError: |
|
115 |
continue |
|
116 |
# Get schema for this entity type |
|
117 |
eschema = schema.eschema(vartype) |
|
118 |
rtype = _get_approriate_translation(translations_found, eschema) |
|
119 |
if rtype is None: |
|
120 |
continue |
|
121 |
relation.r_type = rtype |
|
1433 | 122 |
|
0 | 123 |
|
124 |
||
125 |
QUOTED_SRE = re.compile(r'(.*?)(["\'])(.+?)\2') |
|
126 |
||
127 |
TRANSLATION_MAPS = {} |
|
128 |
def trmap(config, schema, lang): |
|
129 |
try: |
|
130 |
return TRANSLATION_MAPS[lang] |
|
131 |
except KeyError: |
|
132 |
assert lang in config.translations, '%s %s' % (lang, config.translations) |
|
3362
2a2dcfb379a0
[magicsearch] update to match new i18n API: config.translations[lang] now returns a couple of function
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
2650
diff
changeset
|
133 |
tr, ctxtr = config.translations[lang] |
0 | 134 |
langmap = {} |
135 |
for etype in schema.entities(): |
|
136 |
etype = str(etype) |
|
137 |
langmap[tr(etype).capitalize()] = etype |
|
138 |
langmap[etype.capitalize()] = etype |
|
139 |
for rtype in schema.relations(): |
|
140 |
rtype = str(rtype) |
|
141 |
langmap.setdefault(tr(rtype).lower(), set()).add(rtype) |
|
142 |
langmap.setdefault(rtype, set()).add(rtype) |
|
143 |
TRANSLATION_MAPS[lang] = langmap |
|
144 |
return langmap |
|
145 |
||
146 |
||
147 |
class BaseQueryProcessor(Component): |
|
148 |
__abstract__ = True |
|
3377
dd9d292b6a6d
use __regid__ instead of id on appobject classes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3369
diff
changeset
|
149 |
__regid__ = 'magicsearch_processor' |
0 | 150 |
# set something if you want explicit component search facility for the |
151 |
# component |
|
152 |
name = None |
|
153 |
||
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
154 |
def process_query(self, uquery): |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
155 |
args = self.preprocess_query(uquery) |
0 | 156 |
try: |
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
157 |
return self._cw.execute(*args) |
0 | 158 |
finally: |
159 |
# rollback necessary to avoid leaving the connection in a bad state |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
160 |
self._cw.cnx.rollback() |
0 | 161 |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
162 |
def preprocess_query(self, uquery): |
0 | 163 |
raise NotImplementedError() |
164 |
||
165 |
||
166 |
||
167 |
||
168 |
class DoNotPreprocess(BaseQueryProcessor): |
|
169 |
"""this one returns the raw query and should be placed in first position |
|
170 |
of the chain |
|
171 |
""" |
|
172 |
name = 'rql' |
|
173 |
priority = 0 |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
174 |
def preprocess_query(self, uquery): |
0 | 175 |
return uquery, |
1433 | 176 |
|
0 | 177 |
|
178 |
class QueryTranslator(BaseQueryProcessor): |
|
1433 | 179 |
""" parses through rql and translates into schema language entity names |
0 | 180 |
and attributes |
181 |
""" |
|
182 |
priority = 2 |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
183 |
def preprocess_query(self, uquery): |
2567
961aa959f07a
avoid execution of queries which are known to be wrong by letting error propagates
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2476
diff
changeset
|
184 |
rqlst = parse(uquery, print_errors=False) |
4045
f4a52abb6f4f
cw 3.6 api update
Sandrine Ribeau <sandrine.ribeau@logilab.fr>
parents:
3720
diff
changeset
|
185 |
schema = self._cw.vreg.schema |
0 | 186 |
# rql syntax tree will be modified in place if necessary |
4084
69739e6ebd2a
more api update
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4045
diff
changeset
|
187 |
translate_rql_tree(rqlst, trmap(self._cw.vreg.config, schema, self._cw.lang), |
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
188 |
schema) |
0 | 189 |
return rqlst.as_string(), |
190 |
||
191 |
||
192 |
class QSPreProcessor(BaseQueryProcessor): |
|
193 |
"""Quick search preprocessor |
|
194 |
||
195 |
preprocessing query in shortcut form to their RQL form |
|
196 |
""" |
|
197 |
priority = 4 |
|
1433 | 198 |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
199 |
def preprocess_query(self, uquery): |
1138
22f634977c95
make pylint happy, fix some bugs on the way
sylvain.thenault@logilab.fr
parents:
984
diff
changeset
|
200 |
"""try to get rql from an unicode query string""" |
0 | 201 |
args = None |
202 |
try: |
|
203 |
# Process as if there was a quoted part |
|
204 |
args = self._quoted_words_query(uquery) |
|
1433 | 205 |
## No quoted part |
0 | 206 |
except BadRQLQuery: |
207 |
words = uquery.split() |
|
208 |
if len(words) == 1: |
|
209 |
args = self._one_word_query(*words) |
|
210 |
elif len(words) == 2: |
|
211 |
args = self._two_words_query(*words) |
|
212 |
elif len(words) == 3: |
|
213 |
args = self._three_words_query(*words) |
|
214 |
else: |
|
2567
961aa959f07a
avoid execution of queries which are known to be wrong by letting error propagates
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2476
diff
changeset
|
215 |
raise |
0 | 216 |
return args |
1433 | 217 |
|
0 | 218 |
def _get_entity_type(self, word): |
219 |
"""check if the given word is matching an entity type, return it if |
|
220 |
it's the case or raise BadRQLQuery if not |
|
221 |
""" |
|
222 |
etype = word.capitalize() |
|
223 |
try: |
|
4084
69739e6ebd2a
more api update
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4045
diff
changeset
|
224 |
return trmap(self._cw.vreg.config, self._cw.vreg.schema, self._cw.lang)[etype] |
0 | 225 |
except KeyError: |
1433 | 226 |
raise BadRQLQuery('%s is not a valid entity name' % etype) |
0 | 227 |
|
228 |
def _get_attribute_name(self, word, eschema): |
|
229 |
"""check if the given word is matching an attribute of the given entity type, |
|
230 |
return it normalized if found or return it untransformed else |
|
231 |
""" |
|
232 |
"""Returns the attributes's name as stored in the DB""" |
|
233 |
# Need to convert from unicode to string (could be whatever) |
|
234 |
rtype = word.lower() |
|
235 |
# Find the entity name as stored in the DB |
|
4084
69739e6ebd2a
more api update
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4045
diff
changeset
|
236 |
translations = trmap(self._cw.vreg.config, self._cw.vreg.schema, self._cw.lang) |
0 | 237 |
try: |
238 |
translations = translations[rtype] |
|
239 |
except KeyError: |
|
240 |
raise BadRQLQuery('%s is not a valid attribute for %s entity type' |
|
241 |
% (word, eschema)) |
|
242 |
rtype = _get_approriate_translation(translations, eschema) |
|
243 |
if rtype is None: |
|
244 |
raise BadRQLQuery('%s is not a valid attribute for %s entity type' |
|
245 |
% (word, eschema)) |
|
246 |
return rtype |
|
247 |
||
248 |
def _one_word_query(self, word): |
|
249 |
"""Specific process for one word query (case (1) of preprocess_rql) |
|
250 |
""" |
|
251 |
# if this is an integer, then directly go to eid |
|
252 |
try: |
|
3377
dd9d292b6a6d
use __regid__ instead of id on appobject classes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3369
diff
changeset
|
253 |
eid = typed_eid(word) |
0 | 254 |
return 'Any X WHERE X eid %(x)s', {'x': eid}, 'x' |
255 |
except ValueError: |
|
256 |
etype = self._get_entity_type(word) |
|
257 |
return '%s %s' % (etype, etype[0]), |
|
258 |
||
259 |
def _complete_rql(self, searchstr, etype, rtype=None, var=None, searchattr=None): |
|
260 |
searchop = '' |
|
261 |
if '%' in searchstr: |
|
262 |
if rtype: |
|
4045
f4a52abb6f4f
cw 3.6 api update
Sandrine Ribeau <sandrine.ribeau@logilab.fr>
parents:
3720
diff
changeset
|
263 |
possible_etypes = self._cw.vreg.schema.rschema(rtype).objects(etype) |
0 | 264 |
else: |
4045
f4a52abb6f4f
cw 3.6 api update
Sandrine Ribeau <sandrine.ribeau@logilab.fr>
parents:
3720
diff
changeset
|
265 |
possible_etypes = [self._cw.vreg.schema.eschema(etype)] |
0 | 266 |
if searchattr or len(possible_etypes) == 1: |
267 |
searchattr = searchattr or possible_etypes[0].main_attribute() |
|
268 |
searchop = 'LIKE ' |
|
269 |
searchattr = searchattr or 'has_text' |
|
270 |
if var is None: |
|
271 |
var = etype[0] |
|
272 |
return '%s %s %s%%(text)s' % (var, searchattr, searchop) |
|
1433 | 273 |
|
0 | 274 |
def _two_words_query(self, word1, word2): |
275 |
"""Specific process for two words query (case (2) of preprocess_rql) |
|
276 |
""" |
|
277 |
etype = self._get_entity_type(word1) |
|
278 |
# this is a valid RQL query : ("Person X", or "Person TMP1") |
|
279 |
if len(word2) == 1 and word2.isupper(): |
|
280 |
return '%s %s' % (etype, word2), |
|
281 |
# else, suppose it's a shortcut like : Person Smith |
|
5768
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
282 |
restriction = self._complete_rql(word2, etype) |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
283 |
if ' has_text ' in restriction: |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
284 |
rql = '%s %s ORDERBY FTIRANK(%s) DESC WHERE %s' % ( |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
285 |
etype, etype[0], etype[0], restriction) |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
286 |
else: |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
287 |
rql = '%s %s WHERE %s' % ( |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
288 |
etype, etype[0], restriction) |
0 | 289 |
return rql, {'text': word2} |
1433 | 290 |
|
0 | 291 |
def _three_words_query(self, word1, word2, word3): |
292 |
"""Specific process for three words query (case (3) of preprocess_rql) |
|
293 |
""" |
|
294 |
etype = self._get_entity_type(word1) |
|
4045
f4a52abb6f4f
cw 3.6 api update
Sandrine Ribeau <sandrine.ribeau@logilab.fr>
parents:
3720
diff
changeset
|
295 |
eschema = self._cw.vreg.schema.eschema(etype) |
0 | 296 |
rtype = self._get_attribute_name(word2, eschema) |
297 |
# expand shortcut if rtype is a non final relation |
|
4045
f4a52abb6f4f
cw 3.6 api update
Sandrine Ribeau <sandrine.ribeau@logilab.fr>
parents:
3720
diff
changeset
|
298 |
if not self._cw.vreg.schema.rschema(rtype).final: |
0 | 299 |
return self._expand_shortcut(etype, rtype, word3) |
300 |
if '%' in word3: |
|
301 |
searchop = 'LIKE ' |
|
302 |
else: |
|
303 |
searchop = '' |
|
304 |
rql = '%s %s WHERE %s' % (etype, etype[0], |
|
305 |
self._complete_rql(word3, etype, searchattr=rtype)) |
|
306 |
return rql, {'text': word3} |
|
307 |
||
308 |
def _expand_shortcut(self, etype, rtype, searchstr): |
|
309 |
"""Expands shortcut queries on a non final relation to use has_text or |
|
310 |
the main attribute (according to possible entity type) if '%' is used in the |
|
311 |
search word |
|
312 |
||
313 |
Transforms : 'person worksat IBM' into |
|
314 |
'Personne P WHERE P worksAt C, C has_text "IBM"' |
|
315 |
""" |
|
316 |
# check out all possilbe entity types for the relation represented |
|
317 |
# by 'rtype' |
|
318 |
mainvar = etype[0] |
|
319 |
searchvar = mainvar + '1' |
|
5768
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
320 |
restriction = self._complete_rql(searchstr, etype, rtype=rtype, |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
321 |
var=searchvar) |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
322 |
if ' has_text ' in restriction: |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
323 |
rql = ('%s %s ORDERBY FTIRANK(%s) DESC ' |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
324 |
'WHERE %s %s %s, %s' % (etype, mainvar, searchvar, |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
325 |
mainvar, rtype, searchvar, # P worksAt C |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
326 |
restriction)) |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
327 |
else: |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
328 |
rql = ('%s %s WHERE %s %s %s, %s' % (etype, mainvar, |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
329 |
mainvar, rtype, searchvar, # P worksAt C |
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
330 |
restriction)) |
0 | 331 |
return rql, {'text': searchstr} |
332 |
||
333 |
||
334 |
def _quoted_words_query(self, ori_rql): |
|
335 |
"""Specific process when there's a "quoted" part |
|
336 |
""" |
|
337 |
m = QUOTED_SRE.match(ori_rql) |
|
338 |
# if there's no quoted part, then no special pre-processing to do |
|
339 |
if m is None: |
|
340 |
raise BadRQLQuery("unable to handle request %r" % ori_rql) |
|
341 |
left_words = m.group(1).split() |
|
342 |
quoted_part = m.group(3) |
|
343 |
# Case (1) : Company "My own company" |
|
344 |
if len(left_words) == 1: |
|
345 |
try: |
|
346 |
word1 = left_words[0] |
|
347 |
return self._two_words_query(word1, quoted_part) |
|
348 |
except BadRQLQuery, error: |
|
349 |
raise BadRQLQuery("unable to handle request %r" % ori_rql) |
|
350 |
# Case (2) : Company name "My own company"; |
|
351 |
elif len(left_words) == 2: |
|
352 |
word1, word2 = left_words |
|
353 |
return self._three_words_query(word1, word2, quoted_part) |
|
354 |
# return ori_rql |
|
355 |
raise BadRQLQuery("unable to handle request %r" % ori_rql) |
|
1433 | 356 |
|
0 | 357 |
|
1433 | 358 |
|
0 | 359 |
class FullTextTranslator(BaseQueryProcessor): |
360 |
priority = 10 |
|
361 |
name = 'text' |
|
1433 | 362 |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
363 |
def preprocess_query(self, uquery): |
0 | 364 |
"""suppose it's a plain text query""" |
5768
1e73a466aa69
[fti] support for fti ranking: has_text query results sorted by relevance, and provides a way to control weight per entity / entity's attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
365 |
return 'Any X ORDERBY FTIRANK(X) DESC WHERE X has_text %(text)s', {'text': uquery} |
0 | 366 |
|
367 |
||
368 |
||
661
4f61eb8a96b7
properly kill/depreciate component base class, only keep Component
sylvain.thenault@logilab.fr
parents:
0
diff
changeset
|
369 |
class MagicSearchComponent(Component): |
3408
c92170fca813
[api] use __regid__ instead of deprecated id
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
370 |
__regid__ = 'magicsearch' |
0 | 371 |
def __init__(self, req, rset=None): |
2890
fdcb8a2bb6eb
fix __init__ parameters
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2650
diff
changeset
|
372 |
super(MagicSearchComponent, self).__init__(req, rset=rset) |
0 | 373 |
processors = [] |
374 |
self.by_name = {} |
|
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3408
diff
changeset
|
375 |
for processorcls in self._cw.vreg['components']['magicsearch_processor']: |
0 | 376 |
# instantiation needed |
3462
3a79fecdd2b4
[magicsearch] make tests pass again: base preprocessor must have access to vreg
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3451
diff
changeset
|
377 |
processor = processorcls(self._cw) |
0 | 378 |
processors.append(processor) |
379 |
if processor.name is not None: |
|
380 |
assert not processor.name in self.by_name |
|
381 |
self.by_name[processor.name.lower()] = processor |
|
382 |
self.processors = sorted(processors, key=lambda x: x.priority) |
|
383 |
||
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
384 |
def process_query(self, uquery): |
0 | 385 |
assert isinstance(uquery, unicode) |
386 |
try: |
|
387 |
procname, query = uquery.split(':', 1) |
|
388 |
proc = self.by_name[procname.strip().lower()] |
|
389 |
uquery = query.strip() |
|
390 |
except: |
|
391 |
# use processor chain |
|
392 |
unauthorized = None |
|
393 |
for proc in self.processors: |
|
394 |
try: |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
395 |
try: |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
396 |
return proc.process_query(uquery) |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
397 |
except TypeError, exc: # cw 3.5 compat |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
398 |
warn("[3.6] %s.%s.process_query() should now accept uquery " |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
399 |
"as unique argument, use self._cw instead of req" |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
400 |
% (proc.__module__, proc.__class__.__name__), |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
401 |
DeprecationWarning) |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
402 |
return proc.process_query(uquery, self._cw) |
0 | 403 |
# FIXME : we don't want to catch any exception type here ! |
404 |
except (RQLSyntaxError, BadRQLQuery): |
|
405 |
pass |
|
406 |
except Unauthorized, ex: |
|
407 |
unauthorized = ex |
|
408 |
continue |
|
409 |
except Exception, ex: |
|
410 |
LOGGER.debug('%s: %s', ex.__class__.__name__, ex) |
|
411 |
continue |
|
412 |
if unauthorized: |
|
413 |
raise unauthorized |
|
414 |
else: |
|
3469
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
415 |
# explicitly specified processor: don't try to catch the exception |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
416 |
return proc.process_query(uquery) |
1e28876c4b55
[magicsearch] (pre_)process_query doesn't need the req argument, instances already have access to self._cw
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3462
diff
changeset
|
417 |
raise BadRQLQuery(self._cw._('sorry, the server is unable to handle this query')) |