"""a query preprocesser to handle quick search shortcuts for cubicweb:organization: Logilab:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr"""__docformat__="restructuredtext en"importrefromloggingimportgetLoggerfromrqlimportRQLSyntaxError,BadRQLQuery,parsefromrql.nodesimportRelationfromcubicwebimportUnauthorizedfromcubicweb.common.appobjectimportComponent,SingletonComponentLOGGER=getLogger('cubicweb.magicsearch')def_get_approriate_translation(translations_found,eschema):"""return the first (should be the only one) possible translation according to the given entity type """# get the list of all attributes / relations for this kind of entityexisting_relations=set(eschema.subject_relations())consistent_translations=translations_found&existing_relationsiflen(consistent_translations)==0:returnNonereturnconsistent_translations.pop()deftranslate_rql_tree(rqlst,translations,schema):"""Try to translate each relation in the RQL syntax tree :type rqlst: `rql.stmts.Statement` :param rqlst: the RQL syntax tree :type translations: dict :param translations: the reverted l10n dict :type schema: `cubicweb.schema.Schema` :param schema: the application's schema """# var_types is used as a map : var_name / var_typevartypes={}# ambiguous_nodes is used as a map : relation_node / (var_name, available_translations)ambiguous_nodes={}# For each relation node, check if it's a localized relation name# If it's a localized name, then use the original relation name, else# keep the existing relation nameforrelationinrqlst.get_nodes(Relation):rtype=relation.r_typelhs,rhs=relation.get_variable_parts()ifrtype=='is':try:etype=translations[rhs.value]rhs.value=etypeexceptKeyError:# If no translation found, leave the entity type as isetype=rhs.value# Memorize variable's typevartypes[lhs.name]=etypeelse:try:translation_set=translations[rtype]exceptKeyError:pass# If no translation found, leave the relation type as iselse:# Only one possible translation, no ambiguityiflen(translation_set)==1:relation.r_type=iter(translations[rtype]).next()# More than 1 possible translation => resolve it laterelse:ambiguous_nodes[relation]=(lhs.name,translation_set)ifambiguous_nodes:resolve_ambiguities(vartypes,ambiguous_nodes,schema)defresolve_ambiguities(var_types,ambiguous_nodes,schema):"""Tries to resolve remaining ambiguities for translation /!\ An ambiguity is when two different string can be localized with the same string A simple example: - 'name' in a company context will be localized as 'nom' in French - but ... 'surname' will also be localized as 'nom' :type var_types: dict :param var_types: a map : var_name / var_type :type ambiguous_nodes: dict :param ambiguous_nodes: a map : relation_node / (var_name, available_translations) :type schema: `cubicweb.schema.Schema` :param schema: the application's schema """# Now, try to resolve ambiguous translationsforrelation,(var_name,translations_found)inambiguous_nodes.items():try:vartype=var_types[var_name]exceptKeyError:continue# Get schema for this entity typeeschema=schema.eschema(vartype)rtype=_get_approriate_translation(translations_found,eschema)ifrtypeisNone:continuerelation.r_type=rtypeQUOTED_SRE=re.compile(r'(.*?)(["\'])(.+?)\2')TRANSLATION_MAPS={}deftrmap(config,schema,lang):try:returnTRANSLATION_MAPS[lang]exceptKeyError:assertlanginconfig.translations,'%s%s'%(lang,config.translations)tr=config.translations[lang]langmap={}foretypeinschema.entities():etype=str(etype)langmap[tr(etype).capitalize()]=etypelangmap[etype.capitalize()]=etypeforrtypeinschema.relations():rtype=str(rtype)langmap.setdefault(tr(rtype).lower(),set()).add(rtype)langmap.setdefault(rtype,set()).add(rtype)TRANSLATION_MAPS[lang]=langmapreturnlangmapclassBaseQueryProcessor(Component):__abstract__=Trueid='magicsearch_processor'# set something if you want explicit component search facility for the# componentname=Nonedefprocess_query(self,uquery,req):args=self.preprocess_query(uquery,req)try:returnreq.execute(*args)finally:# rollback necessary to avoid leaving the connection in a bad statereq.cnx.rollback()defpreprocess_query(self,uquery,req):raiseNotImplementedError()classDoNotPreprocess(BaseQueryProcessor):"""this one returns the raw query and should be placed in first position of the chain """name='rql'priority=0defpreprocess_query(self,uquery,req):returnuquery,classQueryTranslator(BaseQueryProcessor):""" parses through rql and translates into schema language entity names and attributes """priority=2defpreprocess_query(self,uquery,req):try:rqlst=parse(uquery,print_errors=False)except(RQLSyntaxError,BadRQLQuery),err:returnuquery,schema=self.vreg.schema# rql syntax tree will be modified in place if necessarytranslate_rql_tree(rqlst,trmap(self.config,schema,req.lang),schema)returnrqlst.as_string(),classQSPreProcessor(BaseQueryProcessor):"""Quick search preprocessor preprocessing query in shortcut form to their RQL form """priority=4defpreprocess_query(self,uquery,req):""""""args=Noneself.req=reqtry:# Process as if there was a quoted partargs=self._quoted_words_query(uquery)## No quoted part exceptBadRQLQuery:words=uquery.split()iflen(words)==1:args=self._one_word_query(*words)eliflen(words)==2:args=self._two_words_query(*words)eliflen(words)==3:args=self._three_words_query(*words)else:args=self._multiple_words_query(words)returnargsdef_get_entity_type(self,word):"""check if the given word is matching an entity type, return it if it's the case or raise BadRQLQuery if not """etype=word.capitalize()try:returntrmap(self.config,self.vreg.schema,self.req.lang)[etype]exceptKeyError:raiseBadRQLQuery('%s is not a valid entity name'%etype)def_get_attribute_name(self,word,eschema):"""check if the given word is matching an attribute of the given entity type, return it normalized if found or return it untransformed else """"""Returns the attributes's name as stored in the DB"""# Need to convert from unicode to string (could be whatever)rtype=word.lower()# Find the entity name as stored in the DBtranslations=trmap(self.config,self.vreg.schema,self.req.lang)try:translations=translations[rtype]exceptKeyError:raiseBadRQLQuery('%s is not a valid attribute for %s entity type'%(word,eschema))rtype=_get_approriate_translation(translations,eschema)ifrtypeisNone:raiseBadRQLQuery('%s is not a valid attribute for %s entity type'%(word,eschema))returnrtypedef_one_word_query(self,word):"""Specific process for one word query (case (1) of preprocess_rql) """# if this is an integer, then directly go to eidtry:eid=int(word)return'Any X WHERE X eid %(x)s',{'x':eid},'x'exceptValueError:etype=self._get_entity_type(word)return'%s%s'%(etype,etype[0]),def_complete_rql(self,searchstr,etype,rtype=None,var=None,searchattr=None):searchop=''if'%'insearchstr:ifrtype:possible_etypes=self.schema.rschema(rtype).objects(etype)else:possible_etypes=[self.schema.eschema(etype)]ifsearchattrorlen(possible_etypes)==1:searchattr=searchattrorpossible_etypes[0].main_attribute()searchop='LIKE 'searchattr=searchattror'has_text'ifvarisNone:var=etype[0]return'%s%s%s%%(text)s'%(var,searchattr,searchop)def_two_words_query(self,word1,word2):"""Specific process for two words query (case (2) of preprocess_rql) """etype=self._get_entity_type(word1)# this is a valid RQL query : ("Person X", or "Person TMP1")iflen(word2)==1andword2.isupper():return'%s%s'%(etype,word2),# else, suppose it's a shortcut like : Person Smithrql='%s%s WHERE %s'%(etype,etype[0],self._complete_rql(word2,etype))returnrql,{'text':word2}def_three_words_query(self,word1,word2,word3):"""Specific process for three words query (case (3) of preprocess_rql) """etype=self._get_entity_type(word1)eschema=self.schema.eschema(etype)rtype=self._get_attribute_name(word2,eschema)# expand shortcut if rtype is a non final relationifnotself.schema.rschema(rtype).is_final():returnself._expand_shortcut(etype,rtype,word3)if'%'inword3:searchop='LIKE 'else:searchop=''rql='%s%s WHERE %s'%(etype,etype[0],self._complete_rql(word3,etype,searchattr=rtype))returnrql,{'text':word3}def_multiple_words_query(self,words):"""specific process for more than 3 words query"""return' '.join(words),def_expand_shortcut(self,etype,rtype,searchstr):"""Expands shortcut queries on a non final relation to use has_text or the main attribute (according to possible entity type) if '%' is used in the search word Transforms : 'person worksat IBM' into 'Personne P WHERE P worksAt C, C has_text "IBM"' """# check out all possilbe entity types for the relation represented# by 'rtype'mainvar=etype[0]searchvar=mainvar+'1'rql='%s%s WHERE %s%s%s, %s'%(etype,mainvar,# Person Pmainvar,rtype,searchvar,# P worksAt Cself._complete_rql(searchstr,etype,rtype=rtype,var=searchvar))returnrql,{'text':searchstr}def_quoted_words_query(self,ori_rql):"""Specific process when there's a "quoted" part """m=QUOTED_SRE.match(ori_rql)# if there's no quoted part, then no special pre-processing to doifmisNone:raiseBadRQLQuery("unable to handle request %r"%ori_rql)left_words=m.group(1).split()quoted_part=m.group(3)# Case (1) : Company "My own company"iflen(left_words)==1:try:word1=left_words[0]returnself._two_words_query(word1,quoted_part)exceptBadRQLQuery,error:raiseBadRQLQuery("unable to handle request %r"%ori_rql)# Case (2) : Company name "My own company";eliflen(left_words)==2:word1,word2=left_wordsreturnself._three_words_query(word1,word2,quoted_part)# return ori_rqlraiseBadRQLQuery("unable to handle request %r"%ori_rql)classFullTextTranslator(BaseQueryProcessor):priority=10name='text'defpreprocess_query(self,uquery,req):"""suppose it's a plain text query"""return'Any X WHERE X has_text %(text)s',{'text':uquery}classMagicSearchComponent(SingletonComponent):id='magicsearch'def__init__(self,req,rset=None):super(MagicSearchComponent,self).__init__(req,rset)processors=[]self.by_name={}forprocessorclsinself.vreg.registry_objects('components','magicsearch_processor'):# instantiation neededprocessor=processorcls()processors.append(processor)ifprocessor.nameisnotNone:assertnotprocessor.nameinself.by_nameself.by_name[processor.name.lower()]=processorself.processors=sorted(processors,key=lambdax:x.priority)defprocess_query(self,uquery,req):assertisinstance(uquery,unicode)try:procname,query=uquery.split(':',1)proc=self.by_name[procname.strip().lower()]uquery=query.strip()except:# use processor chainunauthorized=Noneforprocinself.processors:try:returnproc.process_query(uquery,req)# FIXME : we don't want to catch any exception type here !except(RQLSyntaxError,BadRQLQuery):passexceptUnauthorized,ex:unauthorized=excontinueexceptException,ex:LOGGER.debug('%s: %s',ex.__class__.__name__,ex)continueifunauthorized:raiseunauthorizedelse:# let exception propagatereturnproc.process_query(uquery,req)raiseBadRQLQuery(req._('sorry, the server is unable to handle this query'))# Do not make a strong dependency on NlpToolstry:fromNlpTools.rqltools.clientimportRQLClientexceptImportError:LOGGER.info('could not import RQLClient (NlpTools)')else:try:fromPyro.errorsimportNamingErrorexceptImportError:LOGGER.warning("pyro is not installed, can't try to connect to nlp server")else:try:classNLPProcessor(BaseQueryProcessor):priority=8nlp_agent=RQLClient('ivan')defpreprocess_query(self,uquery,req):try:answer=self.nlp_agent.get_translation(uquery)ifnotanswer:raiseBadRQLQuery(uquery)returnansweroruquery,exceptException,ex:LOGGER.exception(str(ex))returnuquery,exceptNamingError:# NlpTools available but no server registeredLOGGER.warning('could not find any RQLServer object named "ivan"')