1 """a query preprocesser to handle quick search shortcuts for cubicweb |
1 """a query preprocesser to handle quick search shortcuts for cubicweb |
2 |
2 |
3 |
3 |
4 :organization: Logilab |
4 :organization: Logilab |
5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
5 :copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
7 """ |
7 """ |
8 |
8 |
9 __docformat__ = "restructuredtext en" |
9 __docformat__ = "restructuredtext en" |
10 |
10 |
13 |
13 |
14 from rql import RQLSyntaxError, BadRQLQuery, parse |
14 from rql import RQLSyntaxError, BadRQLQuery, parse |
15 from rql.nodes import Relation |
15 from rql.nodes import Relation |
16 |
16 |
17 from cubicweb import Unauthorized |
17 from cubicweb import Unauthorized |
18 from cubicweb.common.appobject import Component, SingletonComponent |
18 from cubicweb.view import Component |
19 |
19 |
20 LOGGER = getLogger('cubicweb.magicsearch') |
20 LOGGER = getLogger('cubicweb.magicsearch') |
21 |
21 |
22 def _get_approriate_translation(translations_found, eschema): |
22 def _get_approriate_translation(translations_found, eschema): |
23 """return the first (should be the only one) possible translation according |
23 """return the first (should be the only one) possible translation according |
39 |
39 |
40 :type translations: dict |
40 :type translations: dict |
41 :param translations: the reverted l10n dict |
41 :param translations: the reverted l10n dict |
42 |
42 |
43 :type schema: `cubicweb.schema.Schema` |
43 :type schema: `cubicweb.schema.Schema` |
44 :param schema: the application's schema |
44 :param schema: the application's schema |
45 """ |
45 """ |
46 # var_types is used as a map : var_name / var_type |
46 # var_types is used as a map : var_name / var_type |
47 vartypes = {} |
47 vartypes = {} |
48 # ambiguous_nodes is used as a map : relation_node / (var_name, available_translations) |
48 # ambiguous_nodes is used as a map : relation_node / (var_name, available_translations) |
49 ambiguous_nodes = {} |
49 ambiguous_nodes = {} |
105 eschema = schema.eschema(vartype) |
105 eschema = schema.eschema(vartype) |
106 rtype = _get_approriate_translation(translations_found, eschema) |
106 rtype = _get_approriate_translation(translations_found, eschema) |
107 if rtype is None: |
107 if rtype is None: |
108 continue |
108 continue |
109 relation.r_type = rtype |
109 relation.r_type = rtype |
110 |
110 |
111 |
111 |
112 |
112 |
113 QUOTED_SRE = re.compile(r'(.*?)(["\'])(.+?)\2') |
113 QUOTED_SRE = re.compile(r'(.*?)(["\'])(.+?)\2') |
114 |
114 |
115 TRANSLATION_MAPS = {} |
115 TRANSLATION_MAPS = {} |
143 args = self.preprocess_query(uquery, req) |
143 args = self.preprocess_query(uquery, req) |
144 try: |
144 try: |
145 return req.execute(*args) |
145 return req.execute(*args) |
146 finally: |
146 finally: |
147 # rollback necessary to avoid leaving the connection in a bad state |
147 # rollback necessary to avoid leaving the connection in a bad state |
148 req.cnx.rollback() |
148 req.cnx.rollback() |
149 |
149 |
150 def preprocess_query(self, uquery, req): |
150 def preprocess_query(self, uquery, req): |
151 raise NotImplementedError() |
151 raise NotImplementedError() |
152 |
152 |
153 |
153 |
159 """ |
159 """ |
160 name = 'rql' |
160 name = 'rql' |
161 priority = 0 |
161 priority = 0 |
162 def preprocess_query(self, uquery, req): |
162 def preprocess_query(self, uquery, req): |
163 return uquery, |
163 return uquery, |
164 |
164 |
165 |
165 |
166 class QueryTranslator(BaseQueryProcessor): |
166 class QueryTranslator(BaseQueryProcessor): |
167 """ parses through rql and translates into schema language entity names |
167 """ parses through rql and translates into schema language entity names |
168 and attributes |
168 and attributes |
169 """ |
169 """ |
170 priority = 2 |
170 priority = 2 |
171 def preprocess_query(self, uquery, req): |
171 def preprocess_query(self, uquery, req): |
172 try: |
172 try: |
183 """Quick search preprocessor |
183 """Quick search preprocessor |
184 |
184 |
185 preprocessing query in shortcut form to their RQL form |
185 preprocessing query in shortcut form to their RQL form |
186 """ |
186 """ |
187 priority = 4 |
187 priority = 4 |
188 |
188 |
189 def preprocess_query(self, uquery, req): |
189 def preprocess_query(self, uquery, req): |
190 """""" |
190 """try to get rql from an unicode query string""" |
191 args = None |
191 args = None |
192 self.req = req |
192 self.req = req |
193 try: |
193 try: |
194 # Process as if there was a quoted part |
194 # Process as if there was a quoted part |
195 args = self._quoted_words_query(uquery) |
195 args = self._quoted_words_query(uquery) |
196 ## No quoted part |
196 ## No quoted part |
197 except BadRQLQuery: |
197 except BadRQLQuery: |
198 words = uquery.split() |
198 words = uquery.split() |
199 if len(words) == 1: |
199 if len(words) == 1: |
200 args = self._one_word_query(*words) |
200 args = self._one_word_query(*words) |
201 elif len(words) == 2: |
201 elif len(words) == 2: |
203 elif len(words) == 3: |
203 elif len(words) == 3: |
204 args = self._three_words_query(*words) |
204 args = self._three_words_query(*words) |
205 else: |
205 else: |
206 args = self._multiple_words_query(words) |
206 args = self._multiple_words_query(words) |
207 return args |
207 return args |
208 |
208 |
209 def _get_entity_type(self, word): |
209 def _get_entity_type(self, word): |
210 """check if the given word is matching an entity type, return it if |
210 """check if the given word is matching an entity type, return it if |
211 it's the case or raise BadRQLQuery if not |
211 it's the case or raise BadRQLQuery if not |
212 """ |
212 """ |
213 etype = word.capitalize() |
213 etype = word.capitalize() |
214 try: |
214 try: |
215 return trmap(self.config, self.vreg.schema, self.req.lang)[etype] |
215 return trmap(self.config, self.vreg.schema, self.req.lang)[etype] |
216 except KeyError: |
216 except KeyError: |
217 raise BadRQLQuery('%s is not a valid entity name' % etype) |
217 raise BadRQLQuery('%s is not a valid entity name' % etype) |
218 |
218 |
219 def _get_attribute_name(self, word, eschema): |
219 def _get_attribute_name(self, word, eschema): |
220 """check if the given word is matching an attribute of the given entity type, |
220 """check if the given word is matching an attribute of the given entity type, |
221 return it normalized if found or return it untransformed else |
221 return it normalized if found or return it untransformed else |
222 """ |
222 """ |
259 searchop = 'LIKE ' |
259 searchop = 'LIKE ' |
260 searchattr = searchattr or 'has_text' |
260 searchattr = searchattr or 'has_text' |
261 if var is None: |
261 if var is None: |
262 var = etype[0] |
262 var = etype[0] |
263 return '%s %s %s%%(text)s' % (var, searchattr, searchop) |
263 return '%s %s %s%%(text)s' % (var, searchattr, searchop) |
264 |
264 |
265 def _two_words_query(self, word1, word2): |
265 def _two_words_query(self, word1, word2): |
266 """Specific process for two words query (case (2) of preprocess_rql) |
266 """Specific process for two words query (case (2) of preprocess_rql) |
267 """ |
267 """ |
268 etype = self._get_entity_type(word1) |
268 etype = self._get_entity_type(word1) |
269 # this is a valid RQL query : ("Person X", or "Person TMP1") |
269 # this is a valid RQL query : ("Person X", or "Person TMP1") |
270 if len(word2) == 1 and word2.isupper(): |
270 if len(word2) == 1 and word2.isupper(): |
271 return '%s %s' % (etype, word2), |
271 return '%s %s' % (etype, word2), |
272 # else, suppose it's a shortcut like : Person Smith |
272 # else, suppose it's a shortcut like : Person Smith |
273 rql = '%s %s WHERE %s' % (etype, etype[0], self._complete_rql(word2, etype)) |
273 rql = '%s %s WHERE %s' % (etype, etype[0], self._complete_rql(word2, etype)) |
274 return rql, {'text': word2} |
274 return rql, {'text': word2} |
275 |
275 |
276 def _three_words_query(self, word1, word2, word3): |
276 def _three_words_query(self, word1, word2, word3): |
277 """Specific process for three words query (case (3) of preprocess_rql) |
277 """Specific process for three words query (case (3) of preprocess_rql) |
278 """ |
278 """ |
279 etype = self._get_entity_type(word1) |
279 etype = self._get_entity_type(word1) |
280 eschema = self.schema.eschema(etype) |
280 eschema = self.schema.eschema(etype) |
334 elif len(left_words) == 2: |
334 elif len(left_words) == 2: |
335 word1, word2 = left_words |
335 word1, word2 = left_words |
336 return self._three_words_query(word1, word2, quoted_part) |
336 return self._three_words_query(word1, word2, quoted_part) |
337 # return ori_rql |
337 # return ori_rql |
338 raise BadRQLQuery("unable to handle request %r" % ori_rql) |
338 raise BadRQLQuery("unable to handle request %r" % ori_rql) |
339 |
339 |
340 |
340 |
341 |
341 |
342 class FullTextTranslator(BaseQueryProcessor): |
342 class FullTextTranslator(BaseQueryProcessor): |
343 priority = 10 |
343 priority = 10 |
344 name = 'text' |
344 name = 'text' |
345 |
345 |
346 def preprocess_query(self, uquery, req): |
346 def preprocess_query(self, uquery, req): |
347 """suppose it's a plain text query""" |
347 """suppose it's a plain text query""" |
348 return 'Any X WHERE X has_text %(text)s', {'text': uquery} |
348 return 'Any X WHERE X has_text %(text)s', {'text': uquery} |
349 |
349 |
350 |
350 |
351 |
351 |
352 class MagicSearchComponent(SingletonComponent): |
352 class MagicSearchComponent(Component): |
353 id = 'magicsearch' |
353 id = 'magicsearch' |
354 def __init__(self, req, rset=None): |
354 def __init__(self, req, rset=None): |
355 super(MagicSearchComponent, self).__init__(req, rset) |
355 super(MagicSearchComponent, self).__init__(req, rset) |
356 processors = [] |
356 processors = [] |
357 self.by_name = {} |
357 self.by_name = {} |
390 raise unauthorized |
390 raise unauthorized |
391 else: |
391 else: |
392 # let exception propagate |
392 # let exception propagate |
393 return proc.process_query(uquery, req) |
393 return proc.process_query(uquery, req) |
394 raise BadRQLQuery(req._('sorry, the server is unable to handle this query')) |
394 raise BadRQLQuery(req._('sorry, the server is unable to handle this query')) |
395 |
|
396 |
|
397 # Do not make a strong dependency on NlpTools |
|
398 try: |
|
399 from NlpTools.rqltools.client import RQLClient |
|
400 except ImportError: |
|
401 LOGGER.info('could not import RQLClient (NlpTools)') |
|
402 else: |
|
403 try: |
|
404 from Pyro.errors import NamingError |
|
405 except ImportError: |
|
406 LOGGER.warning("pyro is not installed, can't try to connect to nlp server") |
|
407 else: |
|
408 try: |
|
409 class NLPProcessor(BaseQueryProcessor): |
|
410 priority = 8 |
|
411 nlp_agent = RQLClient('ivan') |
|
412 def preprocess_query(self, uquery, req): |
|
413 try: |
|
414 answer = self.nlp_agent.get_translation(uquery) |
|
415 if not answer: |
|
416 raise BadRQLQuery(uquery) |
|
417 return answer or uquery, |
|
418 except Exception, ex: |
|
419 LOGGER.exception(str(ex)) |
|
420 return uquery, |
|
421 |
|
422 except NamingError: # NlpTools available but no server registered |
|
423 LOGGER.warning('could not find any RQLServer object named "ivan"') |
|
424 |
|