# copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr## This file is part of CubicWeb.## CubicWeb is free software: you can redistribute it and/or modify it under the# terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## CubicWeb is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with CubicWeb. If not, see <http://www.gnu.org/licenses/>."""datafeed parser for xml generated by cubicweb"""importurllib2importStringIOimportos.pathasospfromcookielibimportCookieJarfromdatetimeimportdatetime,timedeltafromlxmlimportetreefromlogilab.common.dateimporttodate,totimefromlogilab.common.textutilsimportsplitstrip,text_to_dictfromyams.constraintsimportBASE_CONVERTERSfromyams.schemaimportrole_nameasrnfromcubicwebimportValidationError,typed_eidfromcubicweb.server.sourcesimportdatafeeddefensure_str_keys(dict):forkeyindict:dict[str(key)]=dict.pop(key)# see cubicweb.web.views.xmlrss.SERIALIZERSDEFAULT_CONVERTERS=BASE_CONVERTERS.copy()DEFAULT_CONVERTERS['String']=unicodeDEFAULT_CONVERTERS['Password']=lambdax:x.encode('utf8')defconvert_date(ustr):returntodate(datetime.strptime(ustr,'%Y-%m-%d'))DEFAULT_CONVERTERS['Date']=convert_datedefconvert_datetime(ustr):if'.'inustr:# assume %Y-%m-%d %H:%M:%S.mmmmmmustr=ustr.split('.',1)[0]returndatetime.strptime(ustr,'%Y-%m-%d %H:%M:%S')DEFAULT_CONVERTERS['Datetime']=convert_datetimedefconvert_time(ustr):returntotime(datetime.strptime(ustr,'%H:%M:%S'))DEFAULT_CONVERTERS['Time']=convert_timedefconvert_interval(ustr):returntime(seconds=int(ustr))DEFAULT_CONVERTERS['Interval']=convert_interval# use a cookie enabled opener to use session cookie if any_OPENER=urllib2.build_opener()try:fromlogilab.commonimporturllib2ext_OPENER.add_handler(urllib2ext.HTTPGssapiAuthHandler())exceptImportError:# python-kerberos not availablepass_OPENER.add_handler(urllib2.HTTPCookieProcessor(CookieJar()))defextract_typed_attrs(eschema,stringdict,converters=DEFAULT_CONVERTERS):typeddict={}forrschemaineschema.subject_relations():ifrschema.finalandrschemainstringdict:ifrschema=='eid':continueattrtype=eschema.destination(rschema)typeddict[rschema.type]=converters[attrtype](stringdict[rschema])returntypeddictdef_entity_etree(parent):fornodeinlist(parent):try:item={'cwtype':unicode(node.tag),'cwuri':node.attrib['cwuri'],'eid':typed_eid(node.attrib['eid']),}exceptKeyError:# cw < 3.11 compat mode XXXitem={'cwtype':unicode(node.tag),'cwuri':node.find('cwuri').text,'eid':typed_eid(node.find('eid').text),}rels={}forchildinnode:role=child.get('role')ifchild.get('role'):# relationrelated=rels.setdefault(role,{}).setdefault(child.tag,[])related+=[ritemforritem,_in_entity_etree(child)]else:# attributeitem[child.tag]=unicode(child.text)yielditem,relsdefbuild_search_rql(etype,attrs):restrictions=[]forattrinattrs:restrictions.append('X %(attr)s%%(%(attr)s)s'%{'attr':attr})return'Any X WHERE X is %s, %s'%(etype,','.join(restrictions))defrtype_role_rql(rtype,role):ifrole=='object':return'Y %s X WHERE X eid %%(x)s'%rtypeelse:return'X %s Y WHERE X eid %%(x)s'%rtypedef_check_no_option(action,options,eid,_):ifoptions:msg=_("'%s' action doesn't take any options")%actionraiseValidationError(eid,{rn('options','subject'):msg})def_check_linkattr_option(action,options,eid,_):ifnot'linkattr'inoptions:msg=_("'%s' action require 'linkattr' option")%actionraiseValidationError(eid,{rn('options','subject'):msg})classCWEntityXMLParser(datafeed.DataFeedParser):"""datafeed parser for the 'xml' entity view"""__regid__='cw.entityxml'action_options={'copy':_check_no_option,'link-or-create':_check_linkattr_option,'link':_check_linkattr_option,}def__init__(self,*args,**kwargs):super(CWEntityXMLParser,self).__init__(*args,**kwargs)self.action_methods={'copy':self.related_copy,'link-or-create':self.related_link_or_create,'link':self.related_link,}# mapping handling #########################################################defadd_schema_config(self,schemacfg,checkonly=False):"""added CWSourceSchemaConfig, modify mapping accordingly"""_=self._cw._try:rtype=schemacfg.schema.rtype.nameexceptAttributeError:msg=_("entity and relation types can't be mapped, only attributes ""or relations")raiseValidationError(schemacfg.eid,{rn('cw_for_schema','subject'):msg})ifschemacfg.options:options=text_to_dict(schemacfg.options)else:options={}try:role=options.pop('role')ifrolenotin('subject','object'):raiseKeyErrorexceptKeyError:msg=_('"role=subject" or "role=object" must be specified in options')raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})try:action=options.pop('action')self.action_options[action](action,options,schemacfg.eid,_)exceptKeyError:msg=_('"action" must be specified in options; allowed values are ''%s')%', '.join(self.action_methods)raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})ifnotcheckonly:ifrole=='subject':etype=schemacfg.schema.stype.namettype=schemacfg.schema.otype.nameelse:etype=schemacfg.schema.otype.namettype=schemacfg.schema.stype.nameetyperules=self.source.mapping.setdefault(etype,{})etyperules.setdefault((rtype,role,action),[]).append((ttype,options))self.source.mapping_idx[schemacfg.eid]=(etype,rtype,role,action,ttype)defdel_schema_config(self,schemacfg,checkonly=False):"""deleted CWSourceSchemaConfig, modify mapping accordingly"""etype,rtype,role,action,ttype=self.source.mapping_idx[schemacfg.eid]rules=self.source.mapping[etype][(rtype,role,action)]rules=[xforxinrulesifnotx[0]==ttype]ifnotrules:delself.source.mapping[etype][(rtype,role,action)]# import handling ##########################################################defprocess(self,url,partialcommit=True):"""IDataFeedParser main entry point"""# XXX suppression support according to source configuration. If set, get# all cwuri of entities from this source, and compare with newly# imported oneserror=Falseforitem,relsinself.parse(url):cwuri=item['cwuri']try:self.process_item(item,rels)ifpartialcommit:# commit+set_pool instead of commit(reset_pool=False) to let# other a chance to get our poolself._cw.commit()self._cw.set_pool()exceptValidationError,exc:ifpartialcommit:self.source.error('Skipping %s because of validation error %s'%(cwuri,exc))self._cw.rollback()self._cw.set_pool()error=Trueelse:raisereturnerrordefparse(self,url):ifnoturl.startswith('http'):stream=StringIO.StringIO(url)else:formappedurlinHOST_MAPPING:ifurl.startswith(mappedurl):url=url.replace(mappedurl,HOST_MAPPING[mappedurl],1)breakself.source.info('GET %s',url)stream=_OPENER.open(url)return_entity_etree(etree.parse(stream).getroot())defprocess_one(self,url):# XXX assert len(root.children) == 1foritem,relsinself.parse(url):returnself.process_item(item,rels)defprocess_item(self,item,rels):entity=self.extid2entity(str(item.pop('cwuri')),item.pop('cwtype'),item=item)ifnot(self.created_during_pull(entity)orself.updated_during_pull(entity)):self.notify_updated(entity)item.pop('eid')# XXX check modification dateattrs=extract_typed_attrs(entity.e_schema,item)entity.set_attributes(**attrs)for(rtype,role,action),rulesinself.source.mapping.get(entity.__regid__,{}).iteritems():try:rel=rels[role][rtype]exceptKeyError:self.source.error('relation %s-%s doesn\'t seem exported in %s xml',rtype,role,entity.__regid__)continuetry:actionmethod=self.action_methods[action]exceptKeyError:raiseException('Unknown action %s'%action)actionmethod(entity,rtype,role,rel,rules)returnentitydefbefore_entity_copy(self,entity,sourceparams):"""IDataFeedParser callback"""attrs=extract_typed_attrs(entity.e_schema,sourceparams['item'])entity.cw_edited.update(attrs)defrelated_copy(self,entity,rtype,role,value,rules):"""implementation of 'copy' action Takes no option. """assertnotany(x[1]forxinrules),"'copy' action takes no option"ttypes=set([x[0]forxinrules])value=[itemforiteminvalueifitem['cwtype']inttypes]eids=[]# local eidsifnotvalue:self._clear_relation(entity,rtype,role,ttypes)returnforiteminvalue:eids.append(self.process_one(self._complete_url(item)).eid)self._set_relation(entity,rtype,role,eids)defrelated_link(self,entity,rtype,role,value,rules):"""implementation of 'link' action requires an options to control search of the linked entity. """forttype,optionsinrules:assert'linkattr'inoptions,("'link-or-create' action require a list of attributes used to ""search if the entity already exists")self._related_link(entity,rtype,role,ttype,value,[options['linkattr']],self._log_not_found)defrelated_link_or_create(self,entity,rtype,role,value,rules):"""implementation of 'link-or-create' action requires an options to control search of the linked entity. """forttype,optionsinrules:assert'linkattr'inoptions,("'link-or-create' action require a list of attributes used to ""search if the entity already exists")self._related_link(entity,rtype,role,ttype,value,[options['linkattr']],self._create_not_found)def_log_not_found(self,entity,rtype,role,ritem,searchvalues):self.source.error('can find %s entity with attributes %s',ritem['cwtype'],searchvalues)def_create_not_found(self,entity,rtype,role,ritem,searchvalues):ensure_str_keys(searchvalues)# XXX necessary with python < 2.6returnself._cw.create_entity(ritem['cwtype'],**searchvalues).eiddef_related_link(self,entity,rtype,role,ttype,value,searchattrs,notfound_callback):eids=[]# local eidsforiteminvalue:ifitem['cwtype']!=ttype:continueifnotall(attrinitemforattrinsearchattrs):# need to fetch related entity's xmlritems=list(self.parse(self._complete_url(item,False)))assertlen(ritems)==1,'unexpected xml'ritem=ritems[0][0]# list of 2-uplesassertall(attrinritemforattrinsearchattrs), \'missing attribute, got %s expected keys %s'%(item,searchattrs)else:ritem=itemkwargs=dict((attr,ritem[attr])forattrinsearchattrs)rql=build_search_rql(item['cwtype'],kwargs)rset=self._cw.execute(rql,kwargs)ifrset:assertlen(rset)==1eids.append(rset[0][0])else:eid=notfound_callback(entity,rtype,role,ritem,kwargs)ifeidisnotNone:eids.append(eid)ifnoteids:self._clear_relation(entity,rtype,role,(ttype,))else:self._set_relation(entity,rtype,role,eids)def_complete_url(self,item,add_relations=True):itemurl=item['cwuri']+'?vid=xml'forrtype,role,_inself.source.mapping.get(item['cwtype'],()):itemurl+='&relation=%s_%s'%(rtype,role)returnitemurldef_clear_relation(self,entity,rtype,role,ttypes):ifentity.eidnotinself.stats['created']:iflen(ttypes)>1:typerestr=', Y is IN(%s)'%','.join(ttypes)else:typerestr=', Y is %s'%','.join(ttypes)self._cw.execute('DELETE '+rtype_role_rql(rtype,role)+typerestr,{'x':entity.eid})def_set_relation(self,entity,rtype,role,eids):eidstr=','.join(str(eid)foreidineids)rql=rtype_role_rql(rtype,role)self._cw.execute('DELETE %s, NOT Y eid IN (%s)'%(rql,eidstr),{'x':entity.eid})ifrole=='object':rql='SET %s, Y eid IN (%s), NOT Y %s X'%(rql,eidstr,rtype)else:rql='SET %s, Y eid IN (%s), NOT X %s Y'%(rql,eidstr,rtype)self._cw.execute(rql,{'x':entity.eid})defregistration_callback(vreg):vreg.register_all(globals().values(),__name__)globalHOST_MAPPINGHOST_MAPPING={}ifvreg.config.apphome:host_mapping_file=osp.join(vreg.config.apphome,'hostmapping.py')ifosp.exists(host_mapping_file):HOST_MAPPING=eval(file(host_mapping_file).read())vreg.info('using host mapping %s from %s',HOST_MAPPING,host_mapping_file)