# copyright 2010-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr## This file is part of CubicWeb.## CubicWeb is free software: you can redistribute it and/or modify it under the# terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## CubicWeb is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with CubicWeb. If not, see <http://www.gnu.org/licenses/>."""datafeed parser for xml generated by cubicwebExample of mapping for CWEntityXMLParser:: {u'CWUser': { # EntityType (u'in_group', u'subject', u'link'): [ # (rtype, role, action) (u'CWGroup', {u'linkattr': u'name'})], # -> rules = [(EntityType, options), ...] (u'tags', u'object', u'link-or-create'): [ # (...) (u'Tag', {u'linkattr': u'name'})], # -> ... (u'use_email', u'subject', u'copy'): [ # (...) (u'EmailAddress', {})] # -> ... } }"""importurllib2importStringIOimportos.pathasospfromcookielibimportCookieJarfromdatetimeimportdatetime,timedeltafromlxmlimportetreefromlogilab.common.dateimporttodate,totimefromlogilab.common.textutilsimportsplitstrip,text_to_dictfromyams.constraintsimportBASE_CONVERTERSfromyams.schemaimportrole_nameasrnfromcubicwebimportValidationError,typed_eidfromcubicweb.server.sourcesimportdatafeeddefensure_str_keys(dic):forkeyindic:dic[str(key)]=dic.pop(key)# XXX see cubicweb.cwvreg.YAMS_TO_PY# XXX see cubicweb.web.views.xmlrss.SERIALIZERSDEFAULT_CONVERTERS=BASE_CONVERTERS.copy()DEFAULT_CONVERTERS['String']=unicodeDEFAULT_CONVERTERS['Password']=lambdax:x.encode('utf8')defconvert_date(ustr):returntodate(datetime.strptime(ustr,'%Y-%m-%d'))DEFAULT_CONVERTERS['Date']=convert_datedefconvert_datetime(ustr):if'.'inustr:# assume %Y-%m-%d %H:%M:%S.mmmmmmustr=ustr.split('.',1)[0]returndatetime.strptime(ustr,'%Y-%m-%d %H:%M:%S')DEFAULT_CONVERTERS['Datetime']=convert_datetimedefconvert_time(ustr):returntotime(datetime.strptime(ustr,'%H:%M:%S'))DEFAULT_CONVERTERS['Time']=convert_timedefconvert_interval(ustr):returntime(seconds=int(ustr))DEFAULT_CONVERTERS['Interval']=convert_interval# use a cookie enabled opener to use session cookie if any_OPENER=urllib2.build_opener()try:fromlogilab.commonimporturllib2ext_OPENER.add_handler(urllib2ext.HTTPGssapiAuthHandler())exceptImportError:# python-kerberos not availablepass_OPENER.add_handler(urllib2.HTTPCookieProcessor(CookieJar()))defextract_typed_attrs(eschema,stringdict,converters=DEFAULT_CONVERTERS):typeddict={}forrschemaineschema.subject_relations():ifrschema.finalandrschemainstringdict:ifrschema=='eid':continueattrtype=eschema.destination(rschema)typeddict[rschema.type]=converters[attrtype](stringdict[rschema])returntypeddictdef_parse_entity_etree(parent):fornodeinlist(parent):try:item={'cwtype':unicode(node.tag),'cwuri':node.attrib['cwuri'],'eid':typed_eid(node.attrib['eid']),}exceptKeyError:# cw < 3.11 compat mode XXXitem={'cwtype':unicode(node.tag),'cwuri':node.find('cwuri').text,'eid':typed_eid(node.find('eid').text),}rels={}forchildinnode:role=child.get('role')ifrole:# relationrelated=rels.setdefault(role,{}).setdefault(child.tag,[])related+=[ritemforritem,_in_parse_entity_etree(child)]else:# attributeitem[child.tag]=unicode(child.text)yielditem,relsdefbuild_search_rql(etype,attrs):restrictions=['X %(attr)s%%(%(attr)s)s'%{'attr':attr}forattrinattrs]return'Any X WHERE X is %s, %s'%(etype,', '.join(restrictions))defrtype_role_rql(rtype,role):ifrole=='object':return'Y %s X WHERE X eid %%(x)s'%rtypeelse:return'X %s Y WHERE X eid %%(x)s'%rtypedef_check_no_option(action,options,eid,_):ifoptions:msg=_("'%s' action doesn't take any options")%actionraiseValidationError(eid,{rn('options','subject'):msg})def_check_linkattr_option(action,options,eid,_):ifnot'linkattr'inoptions:msg=_("'%s' action requires 'linkattr' option")%actionraiseValidationError(eid,{rn('options','subject'):msg})classCWEntityXMLParser(datafeed.DataFeedParser):"""datafeed parser for the 'xml' entity view"""__regid__='cw.entityxml'action_options={'copy':_check_no_option,'link-or-create':_check_linkattr_option,'link':_check_linkattr_option,}def__init__(self,*args,**kwargs):super(CWEntityXMLParser,self).__init__(*args,**kwargs)self.action_methods={'copy':self.related_copy,'link-or-create':self.related_link_or_create,'link':self.related_link,}# mapping handling #########################################################defadd_schema_config(self,schemacfg,checkonly=False):"""added CWSourceSchemaConfig, modify mapping accordingly"""_=self._cw._try:rtype=schemacfg.schema.rtype.nameexceptAttributeError:msg=_("entity and relation types can't be mapped, only attributes ""or relations")raiseValidationError(schemacfg.eid,{rn('cw_for_schema','subject'):msg})ifschemacfg.options:options=text_to_dict(schemacfg.options)else:options={}try:role=options.pop('role')ifrolenotin('subject','object'):raiseKeyErrorexceptKeyError:msg=_('"role=subject" or "role=object" must be specified in options')raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})try:action=options.pop('action')self.action_options[action](action,options,schemacfg.eid,_)exceptKeyError:msg=_('"action" must be specified in options; allowed values are ''%s')%', '.join(self.action_methods)raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})ifnotcheckonly:ifrole=='subject':etype=schemacfg.schema.stype.namettype=schemacfg.schema.otype.nameelse:etype=schemacfg.schema.otype.namettype=schemacfg.schema.stype.nameetyperules=self.source.mapping.setdefault(etype,{})etyperules.setdefault((rtype,role,action),[]).append((ttype,options))self.source.mapping_idx[schemacfg.eid]=(etype,rtype,role,action,ttype)defdel_schema_config(self,schemacfg,checkonly=False):"""deleted CWSourceSchemaConfig, modify mapping accordingly"""etype,rtype,role,action,ttype=self.source.mapping_idx[schemacfg.eid]rules=self.source.mapping[etype][(rtype,role,action)]rules=[xforxinrulesifnotx[0]==ttype]ifnotrules:delself.source.mapping[etype][(rtype,role,action)]# import handling ##########################################################defprocess(self,url,partialcommit=True):"""IDataFeedParser main entry point"""# XXX suppression support according to source configuration. If set, get# all cwuri of entities from this source, and compare with newly# imported oneserror=Falseforitem,relsinself.parse(url):cwuri=item['cwuri']try:self.process_item(item,rels)ifpartialcommit:# commit+set_pool instead of commit(reset_pool=False) to let# other a chance to get our poolself._cw.commit()self._cw.set_pool()exceptValidationError,exc:ifpartialcommit:self.source.error('Skipping %s because of validation error %s'%(cwuri,exc))self._cw.rollback()self._cw.set_pool()error=Trueelse:raisereturnerrordefparse(self,url):ifnoturl.startswith('http'):stream=StringIO.StringIO(url)else:formappedurlinHOST_MAPPING:ifurl.startswith(mappedurl):url=url.replace(mappedurl,HOST_MAPPING[mappedurl],1)breakself.source.info('GET %s',url)stream=_OPENER.open(url)return_parse_entity_etree(etree.parse(stream).getroot())defprocess_item(self,item,rels):entity=self.extid2entity(str(item.pop('cwuri')),item.pop('cwtype'),item=item)ifnot(self.created_during_pull(entity)orself.updated_during_pull(entity)):self.notify_updated(entity)item.pop('eid')# XXX check modification dateattrs=extract_typed_attrs(entity.e_schema,item)entity.set_attributes(**attrs)for(rtype,role,action),rulesinself.source.mapping.get(entity.__regid__,{}).iteritems():try:related_items=rels[role][rtype]exceptKeyError:self.source.error('relation %s-%s not found in xml export of %s',rtype,role,entity.__regid__)continuetry:actionmethod=self.action_methods[action]exceptKeyError:raiseException('Unknown action %s'%action)actionmethod(entity,rtype,role,related_items,rules)returnentitydefbefore_entity_copy(self,entity,sourceparams):"""IDataFeedParser callback"""attrs=extract_typed_attrs(entity.e_schema,sourceparams['item'])entity.cw_edited.update(attrs)defrelated_copy(self,entity,rtype,role,others,rules):"""implementation of 'copy' action Takes no option. """assertnotany(x[1]forxinrules),"'copy' action takes no option"ttypes=set([x[0]forxinrules])others=[itemforiteminothersifitem['cwtype']inttypes]eids=[]# local eidsifnotothers:self._clear_relation(entity,rtype,role,ttypes)returnforiteminothers:item,_rels=self._complete_item(item)other_entity=self.process_item(item,[])eids.append(other_entity.eid)self._set_relation(entity,rtype,role,eids)defrelated_link(self,entity,rtype,role,others,rules):"""implementation of 'link' action requires an options to control search of the linked entity. """forttype,optionsinrules:assert'linkattr'inoptions,("'link' action requires a list of attributes used to ""search if the entity already exists")self._related_link(entity,rtype,role,ttype,others,[options['linkattr']],create_when_not_found=False)defrelated_link_or_create(self,entity,rtype,role,others,rules):"""implementation of 'link-or-create' action requires an options to control search of the linked entity. """forttype,optionsinrules:assert'linkattr'inoptions,("'link-or-create' action requires a list of attributes used to ""search if the entity already exists")self._related_link(entity,rtype,role,ttype,others,[options['linkattr']],create_when_not_found=True)def_related_link(self,entity,rtype,role,ttype,others,searchattrs,create_when_not_found):defissubset(x,y):returnall(zinyforzinx)eids=[]# local eidsforiteminothers:ifitem['cwtype']!=ttype:continueifnotissubset(searchattrs,item):item,_rels=self._complete_item(item,False)ifnotissubset(searchattrs,item):self.source.error('missing attribute, got %s expected keys %s'%item,searchattrs)continuekwargs=dict((attr,item[attr])forattrinsearchattrs)rql=build_search_rql(item['cwtype'],kwargs)rset=self._cw.execute(rql,kwargs)iflen(rset)>1:self.source.error('ambiguous link: found %s entity %s with attributes %s',len(rset),item['cwtype'],kwargs)eliflen(rset)==1:eids.append(rset[0][0])elifcreate_when_not_found:ensure_str_keys(kwargs)# XXX necessary with python < 2.6eids.append(self._cw.create_entity(item['cwtype'],**kwargs).eid)else:self.source.error('can not find %s entity with attributes %s',item['cwtype'],kwargs)ifnoteids:self._clear_relation(entity,rtype,role,(ttype,))else:self._set_relation(entity,rtype,role,eids)def_complete_item(self,item,add_relations=True):itemurl=item['cwuri']+'?vid=xml'ifadd_relations:forrtype,role,_inself.source.mapping.get(item['cwtype'],()):itemurl+='&relation=%s_%s'%(rtype,role)item_rels=list(self.parse(itemurl))assertlen(item_rels)==1returnitem_rels[0]def_clear_relation(self,entity,rtype,role,ttypes):ifentity.eidnotinself.stats['created']:iflen(ttypes)>1:typerestr=', Y is IN(%s)'%','.join(ttypes)else:typerestr=', Y is %s'%','.join(ttypes)self._cw.execute('DELETE '+rtype_role_rql(rtype,role)+typerestr,{'x':entity.eid})def_set_relation(self,entity,rtype,role,eids):rqlbase=rtype_role_rql(rtype,role)rql='DELETE %s'%rqlbaseifeids:eidstr=','.join(str(eid)foreidineids)rql+=', NOT Y eid IN (%s)'%eidstrself._cw.execute(rql,{'x':entity.eid})ifeids:ifrole=='object':rql='SET %s, Y eid IN (%s), NOT Y %s X'%(rqlbase,eidstr,rtype)else:rql='SET %s, Y eid IN (%s), NOT X %s Y'%(rqlbase,eidstr,rtype)self._cw.execute(rql,{'x':entity.eid})defregistration_callback(vreg):vreg.register_all(globals().values(),__name__)globalHOST_MAPPINGHOST_MAPPING={}ifvreg.config.apphome:host_mapping_file=osp.join(vreg.config.apphome,'hostmapping.py')ifosp.exists(host_mapping_file):HOST_MAPPING=eval(file(host_mapping_file).read())vreg.info('using host mapping %s from %s',HOST_MAPPING,host_mapping_file)