# copyright 2010-2012 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr## This file is part of CubicWeb.## CubicWeb is free software: you can redistribute it and/or modify it under the# terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## CubicWeb is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with CubicWeb. If not, see <http://www.gnu.org/licenses/>."""datafeed parser for xml generated by cubicwebExample of mapping for CWEntityXMLParser:: {u'CWUser': { # EntityType (u'in_group', u'subject', u'link'): [ # (rtype, role, action) (u'CWGroup', {u'linkattr': u'name'})], # -> rules = [(EntityType, options), ...] (u'tags', u'object', u'link-or-create'): [ # (...) (u'Tag', {u'linkattr': u'name'})], # -> ... (u'use_email', u'subject', u'copy'): [ # (...) (u'EmailAddress', {})] # -> ... } }"""fromdatetimeimportdatetime,timedelta,timefromurllibimporturlencodefromcgiimportparse_qs# in urlparse with python >= 2.6fromlogilab.common.dateimporttodate,totimefromlogilab.common.textutilsimportsplitstrip,text_to_dictfromlogilab.common.decoratorsimportclasspropertyfromyams.constraintsimportBASE_CONVERTERSfromyams.schemaimportrole_nameasrnfromcubicwebimportValidationError,RegistryExceptionfromcubicweb.viewimportComponentfromcubicweb.server.sourcesimportdatafeedfromcubicweb.server.hookimportmatch_rtype# XXX see cubicweb.cwvreg.YAMS_TO_PY# XXX see cubicweb.web.views.xmlrss.SERIALIZERSDEFAULT_CONVERTERS=BASE_CONVERTERS.copy()DEFAULT_CONVERTERS['String']=unicodeDEFAULT_CONVERTERS['Password']=lambdax:x.encode('utf8')defconvert_date(ustr):returntodate(datetime.strptime(ustr,'%Y-%m-%d'))DEFAULT_CONVERTERS['Date']=convert_datedefconvert_datetime(ustr):if'.'inustr:# assume %Y-%m-%d %H:%M:%S.mmmmmmustr=ustr.split('.',1)[0]returndatetime.strptime(ustr,'%Y-%m-%d %H:%M:%S')DEFAULT_CONVERTERS['Datetime']=convert_datetime# XXX handle timezone, though this will be enough as TZDatetime are# serialized without time zone by default (UTC time). See# cw.web.views.xmlrss.SERIALIZERS.DEFAULT_CONVERTERS['TZDatetime']=convert_datetimedefconvert_time(ustr):returntotime(datetime.strptime(ustr,'%H:%M:%S'))DEFAULT_CONVERTERS['Time']=convert_timeDEFAULT_CONVERTERS['TZTime']=convert_timedefconvert_interval(ustr):returntime(seconds=int(ustr))DEFAULT_CONVERTERS['Interval']=convert_intervaldefextract_typed_attrs(eschema,stringdict,converters=DEFAULT_CONVERTERS):typeddict={}forrschemaineschema.subject_relations():ifrschema.finalandrschemainstringdict:ifrschemain('eid','cwuri','cwtype','cwsource'):continueattrtype=eschema.destination(rschema)value=stringdict[rschema]ifvalueisnotNone:value=converters[attrtype](value)typeddict[rschema.type]=valuereturntypeddictdefrtype_role_rql(rtype,role):ifrole=='object':return'Y %s X WHERE X eid %%(x)s'%rtypeelse:return'X %s Y WHERE X eid %%(x)s'%rtypeclassCWEntityXMLParser(datafeed.DataFeedXMLParser):"""datafeed parser for the 'xml' entity view Most of the logic is delegated to the following components: * an "item builder" component, turning an etree xml node into a specific python dictionary representing an entity * "action" components, selected given an entity, a relation and its role in the relation, and responsible to link the entity to given related items (eg dictionary) So the parser is only doing the gluing service and the connection to the source. """__regid__='cw.entityxml'def__init__(self,*args,**kwargs):super(CWEntityXMLParser,self).__init__(*args,**kwargs)self._parsed_urls={}self._processed_entities=set()defselect_linker(self,action,rtype,role,entity=None):try:returnself._cw.vreg['components'].select('cw.entityxml.action.%s'%action,self._cw,entity=entity,rtype=rtype,role=role,parser=self)exceptRegistryException:raiseRegistryException('Unknown action %s'%action)deflist_actions(self):reg=self._cw.vreg['components']returnsorted(clss[0].actionforrid,clssinreg.iteritems()ifrid.startswith('cw.entityxml.action.'))# mapping handling #########################################################defadd_schema_config(self,schemacfg,checkonly=False):"""added CWSourceSchemaConfig, modify mapping accordingly"""_=self._cw._try:rtype=schemacfg.schema.rtype.nameexceptAttributeError:msg=_("entity and relation types can't be mapped, only attributes ""or relations")raiseValidationError(schemacfg.eid,{rn('cw_for_schema','subject'):msg})ifschemacfg.options:options=text_to_dict(schemacfg.options)else:options={}try:role=options.pop('role')ifrolenotin('subject','object'):raiseKeyErrorexceptKeyError:msg=_('"role=subject" or "role=object" must be specified in options')raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})try:action=options.pop('action')linker=self.select_linker(action,rtype,role)linker.check_options(options,schemacfg.eid)exceptKeyError:msg=_('"action" must be specified in options; allowed values are ''%s')%', '.join(self.list_actions())raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})exceptRegistryException:msg=_('allowed values for "action" are %s')%', '.join(self.list_actions())raiseValidationError(schemacfg.eid,{rn('options','subject'):msg})ifnotcheckonly:ifrole=='subject':etype=schemacfg.schema.stype.namettype=schemacfg.schema.otype.nameelse:etype=schemacfg.schema.otype.namettype=schemacfg.schema.stype.nameetyperules=self.source.mapping.setdefault(etype,{})etyperules.setdefault((rtype,role,action),[]).append((ttype,options))self.source.mapping_idx[schemacfg.eid]=(etype,rtype,role,action,ttype)defdel_schema_config(self,schemacfg,checkonly=False):"""deleted CWSourceSchemaConfig, modify mapping accordingly"""etype,rtype,role,action,ttype=self.source.mapping_idx[schemacfg.eid]rules=self.source.mapping[etype][(rtype,role,action)]rules=[xforxinrulesifnotx[0]==ttype]ifnotrules:delself.source.mapping[etype][(rtype,role,action)]# import handling ##########################################################defprocess(self,url,raise_on_error=False):"""IDataFeedParser main entry point"""ifurl.startswith('http'):# XXX similar loose test as in parse of sources.datafeedurl=self.complete_url(url)super(CWEntityXMLParser,self).process(url,raise_on_error)defparse_etree(self,parent):fornodeinlist(parent):builder=self._cw.vreg['components'].select('cw.entityxml.item-builder',self._cw,node=node,parser=self)yieldbuilder.build_item()defprocess_item(self,item,rels):""" item and rels are what's returned by the item builder `build_item` method: * `item` is an {attribute: value} dictionary * `rels` is for relations and structured as {role: {relation: [(related item, related rels)...]} """entity=self.extid2entity(str(item['cwuri']),item['cwtype'],cwsource=item['cwsource'],item=item)ifentityisNone:returnNoneifentity.eidinself._processed_entities:returnentityself._processed_entities.add(entity.eid)ifnot(self.created_during_pull(entity)orself.updated_during_pull(entity)):attrs=extract_typed_attrs(entity.e_schema,item)self.update_if_necessary(entity,attrs)self.process_relations(entity,rels)returnentitydefprocess_relations(self,entity,rels):etype=entity.cw_etypefor(rtype,role,action),rulesinself.source.mapping.get(etype,{}).iteritems():try:related_items=rels[role][rtype]exceptKeyError:self.import_log.record_error('relation %s-%s not found in xml export of %s'%(rtype,role,etype))continuetry:linker=self.select_linker(action,rtype,role,entity)exceptRegistryException:self.import_log.record_error('no linker for action %s'%action)else:linker.link_items(related_items,rules)defbefore_entity_copy(self,entity,sourceparams):"""IDataFeedParser callback"""attrs=extract_typed_attrs(entity.e_schema,sourceparams['item'])entity.cw_edited.update(attrs)defnormalize_url(self,url):"""overriden to add vid=xml"""url=super(CWEntityXMLParser,self).normalize_url(url)ifurl.startswith('http'):try:url,qs=url.split('?',1)exceptValueError:params={}else:params=parse_qs(qs)ifnot'vid'inparams:params['vid']=['xml']returnurl+'?'+self._cw.build_url_params(**params)returnurldefcomplete_url(self,url,etype=None,known_relations=None):"""append to the url's query string information about relation that should be included in the resulting xml, according to source mapping. If etype is not specified, try to guess it using the last path part of the url, i.e. the format used by default in cubicweb to map all entities of a given type as in 'http://mysite.org/EntityType'. If `known_relations` is given, it should be a dictionary of already known relations, so they don't get queried again. """try:url,qs=url.split('?',1)exceptValueError:qs=''# XXX vid will be added by later call to normalize_url (in parent class)params=parse_qs(qs)ifetypeisNone:try:etype=url.rsplit('/',1)[1]exceptValueError:returnurl+'?'+self._cw.build_url_params(**params)try:etype=self._cw.vreg.case_insensitive_etypes[etype.lower()]exceptKeyError:returnurl+'?'+self._cw.build_url_params(**params)relations=params.setdefault('relation',[])forrtype,role,_inself.source.mapping.get(etype,()):ifknown_relationsandrtypeinknown_relations.get('role',()):continuereldef='%s-%s'%(rtype,role)ifnotreldefinrelations:relations.append(reldef)returnurl+'?'+self._cw.build_url_params(**params)defcomplete_item(self,item,rels):try:returnself._parsed_urls[item['cwuri']]exceptKeyError:itemurl=self.complete_url(item['cwuri'],item['cwtype'],rels)item_rels=list(self.parse(itemurl))assertlen(item_rels)==1,'url %s expected to bring back one '\'and only one entity, got %s'%(itemurl,len(item_rels))self._parsed_urls[item['cwuri']]=item_rels[0]ifrels:# XXX (do it better) merge relationsnew_rels=item_rels[0][1]new_rels.get('subject',{}).update(rels.get('subject',{}))new_rels.get('object',{}).update(rels.get('object',{}))returnitem_rels[0]classCWEntityXMLItemBuilder(Component):__regid__='cw.entityxml.item-builder'def__init__(self,_cw,parser,node,**kwargs):super(CWEntityXMLItemBuilder,self).__init__(_cw,**kwargs)self.parser=parserself.node=nodedefbuild_item(self):"""parse a XML document node and return two dictionaries defining (part of) an entity: - {attribute: value} - {role: {relation: [(related item, related rels)...]} """node=self.nodeitem=dict(node.attrib.items())item['cwtype']=unicode(node.tag)item.setdefault('cwsource',None)try:item['eid']=int(item['eid'])exceptKeyError:# cw < 3.11 compat mode XXXitem['eid']=int(node.find('eid').text)item['cwuri']=node.find('cwuri').textrels={}forchildinnode:role=child.get('role')ifrole:# relationrelated=rels.setdefault(role,{}).setdefault(child.tag,[])related+=self.parser.parse_etree(child)elifchild.text:# attributeitem[child.tag]=unicode(child.text)else:# None attribute (empty tag)item[child.tag]=Nonereturnitem,relsclassCWEntityXMLActionCopy(Component):"""implementation of cubicweb entity xml parser's'copy' action Takes no option. """__regid__='cw.entityxml.action.copy'def__init__(self,_cw,parser,rtype,role,entity=None,**kwargs):super(CWEntityXMLActionCopy,self).__init__(_cw,**kwargs)self.parser=parserself.rtype=rtypeself.role=roleself.entity=entity@classpropertydefaction(cls):returncls.__regid__.rsplit('.',1)[-1]defcheck_options(self,options,eid):self._check_no_options(options,eid)def_check_no_options(self,options,eid,msg=None):ifoptions:ifmsgisNone:msg=self._cw._("'%s' action doesn't take any options")%self.actionraiseValidationError(eid,{rn('options','subject'):msg})deflink_items(self,others,rules):assertnotany(x[1]forxinrules),"'copy' action takes no option"ttypes=frozenset([x[0]forxinrules])eids=[]# local eidsforitem,relsinothers:ifitem['cwtype']inttypes:item,rels=self.parser.complete_item(item,rels)other_entity=self.parser.process_item(item,rels)ifother_entityisnotNone:eids.append(other_entity.eid)ifeids:self._set_relation(eids)else:self._clear_relation(ttypes)def_clear_relation(self,ttypes):ifnotself.parser.created_during_pull(self.entity):iflen(ttypes)>1:typerestr=', Y is IN(%s)'%','.join(ttypes)else:typerestr=', Y is %s'%','.join(ttypes)self._cw.execute('DELETE '+rtype_role_rql(self.rtype,self.role)+typerestr,{'x':self.entity.eid})def_set_relation(self,eids):asserteidsrtype=self.rtyperqlbase=rtype_role_rql(rtype,self.role)eidstr=','.join(str(eid)foreidineids)self._cw.execute('DELETE %s, NOT Y eid IN (%s)'%(rqlbase,eidstr),{'x':self.entity.eid})ifself.role=='object':rql='SET %s, Y eid IN (%s), NOT Y %s X'%(rqlbase,eidstr,rtype)else:rql='SET %s, Y eid IN (%s), NOT X %s Y'%(rqlbase,eidstr,rtype)self._cw.execute(rql,{'x':self.entity.eid})classCWEntityXMLActionLink(CWEntityXMLActionCopy):"""implementation of cubicweb entity xml parser's'link' action requires a 'linkattr' option to control search of the linked entity. """__regid__='cw.entityxml.action.link'defcheck_options(self,options,eid):ifnot'linkattr'inoptions:msg=self._cw._("'%s' action requires 'linkattr' option")%self.actionraiseValidationError(eid,{rn('options','subject'):msg})create_when_not_found=Falsedeflink_items(self,others,rules):forttype,optionsinrules:searchattrs=splitstrip(options.get('linkattr',''))self._related_link(ttype,others,searchattrs)def_related_link(self,ttype,others,searchattrs):defissubset(x,y):returnall(zinyforzinx)eids=[]# local eidslog=self.parser.import_logforitem,relsinothers:ifitem['cwtype']!=ttype:continueifnotissubset(searchattrs,item):item,rels=self.parser.complete_item(item,rels)ifnotissubset(searchattrs,item):log.record_error('missing attribute, got %s expected keys %s'%(item,searchattrs))continue# XXX str() needed with python < 2.6kwargs=dict((str(attr),item[attr])forattrinsearchattrs)targets=self._find_entities(item,kwargs)iflen(targets)==1:entity=targets[0]elifnottargetsandself.create_when_not_found:entity=self._cw.create_entity(item['cwtype'],**kwargs)else:iflen(targets)>1:log.record_error('ambiguous link: found %s entity %s with attributes %s'%(len(targets),item['cwtype'],kwargs))else:log.record_error('can not find %s entity with attributes %s'%(item['cwtype'],kwargs))continueeids.append(entity.eid)self.parser.process_relations(entity,rels)ifeids:self._set_relation(eids)else:self._clear_relation((ttype,))def_find_entities(self,item,kwargs):returntuple(self._cw.find_entities(item['cwtype'],**kwargs))classCWEntityXMLActionLinkInState(CWEntityXMLActionLink):"""custom implementation of cubicweb entity xml parser's'link' action for in_state relation """__select__=match_rtype('in_state')defcheck_options(self,options,eid):super(CWEntityXMLActionLinkInState,self).check_options(options,eid)ifnot'name'inoptions['linkattr']:msg=self._cw._("'%s' action for in_state relation should at least have 'linkattr=name' option")%self.actionraiseValidationError(eid,{rn('options','subject'):msg})def_find_entities(self,item,kwargs):assert'name'initem# XXX else, complete_itemstate_name=item['name']wf=self.entity.cw_adapt_to('IWorkflowable').current_workflowstate=wf.state_by_name(state_name)ifstateisNone:return()return(state,)classCWEntityXMLActionLinkOrCreate(CWEntityXMLActionLink):"""implementation of cubicweb entity xml parser's'link-or-create' action requires a 'linkattr' option to control search of the linked entity. """__regid__='cw.entityxml.action.link-or-create'create_when_not_found=True