sobjects/parsers.py
changeset 7920 5146c63e8e34
parent 7916 54e240c1b419
child 7932 2ad26cc3b5c6
equal deleted inserted replaced
7919:ae3307cac44e 7920:5146c63e8e34
   191                 'cw.entityxml.item-builder', self._cw, node=node,
   191                 'cw.entityxml.item-builder', self._cw, node=node,
   192                 parser=self)
   192                 parser=self)
   193             yield builder.build_item()
   193             yield builder.build_item()
   194 
   194 
   195     def process_item(self, item, rels):
   195     def process_item(self, item, rels):
       
   196         """
       
   197         item and rels are what's returned by the item builder `build_item` method:
       
   198 
       
   199         * `item` is an {attribute: value} dictionary
       
   200         * `rels` is for relations and structured as
       
   201            {role: {relation: [(related item, related rels)...]}
       
   202         """
   196         entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
   203         entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
   197                                    cwsource=item.pop('cwsource'), item=item)
   204                                    cwsource=item.pop('cwsource'), item=item)
   198         if entity is None:
   205         if entity is None:
   199             return None
   206             return None
   200         if entity.eid in self._processed_entities:
   207         if entity.eid in self._processed_entities:
   204             self.notify_updated(entity)
   211             self.notify_updated(entity)
   205             item.pop('eid')
   212             item.pop('eid')
   206             # XXX check modification date
   213             # XXX check modification date
   207             attrs = extract_typed_attrs(entity.e_schema, item)
   214             attrs = extract_typed_attrs(entity.e_schema, item)
   208             entity.set_attributes(**attrs)
   215             entity.set_attributes(**attrs)
   209         for (rtype, role, action), rules in self.source.mapping.get(entity.__regid__, {}).iteritems():
   216         self.process_relations(entity, rels)
       
   217         return entity
       
   218 
       
   219     def process_relations(self, entity, rels):
       
   220         etype = entity.__regid__
       
   221         for (rtype, role, action), rules in self.source.mapping.get(etype, {}).iteritems():
   210             try:
   222             try:
   211                 related_items = rels[role][rtype]
   223                 related_items = rels[role][rtype]
   212             except KeyError:
   224             except KeyError:
   213                 self.source.error('relation %s-%s not found in xml export of %s',
   225                 self.source.error('relation %s-%s not found in xml export of %s',
   214                                   rtype, role, entity.__regid__)
   226                                   rtype, role, etype)
   215                 continue
   227                 continue
   216             try:
   228             try:
   217                 linker = self.select_linker(action, rtype, role, entity)
   229                 linker = self.select_linker(action, rtype, role, entity)
   218             except RegistryException:
   230             except RegistryException:
   219                 self.source.error('no linker for action %s', action)
   231                 self.source.error('no linker for action %s', action)
   220             else:
   232             else:
   221                 linker.link_items(related_items, rules)
   233                 linker.link_items(related_items, rules)
   222         return entity
       
   223 
   234 
   224     def before_entity_copy(self, entity, sourceparams):
   235     def before_entity_copy(self, entity, sourceparams):
   225         """IDataFeedParser callback"""
   236         """IDataFeedParser callback"""
   226         attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
   237         attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
   227         entity.cw_edited.update(attrs)
   238         entity.cw_edited.update(attrs)
   228 
   239 
   229     def complete_url(self, url, etype=None, add_relations=True):
   240     def complete_url(self, url, etype=None, known_relations=None):
   230         """append to the url's query string information about relation that should
   241         """append to the url's query string information about relation that should
   231         be included in the resulting xml, according to source mapping.
   242         be included in the resulting xml, according to source mapping.
   232 
   243 
   233         If etype is not specified, try to guess it using the last path part of
   244         If etype is not specified, try to guess it using the last path part of
   234         the url, i.e. the format used by default in cubicweb to map all entities
   245         the url, i.e. the format used by default in cubicweb to map all entities
   235         of a given type as in 'http://mysite.org/EntityType'.
   246         of a given type as in 'http://mysite.org/EntityType'.
       
   247 
       
   248         If `known_relations` is given, it should be a dictionary of already
       
   249         known relations, so they don't get queried again.
   236         """
   250         """
   237         try:
   251         try:
   238             url, qs = url.split('?', 1)
   252             url, qs = url.split('?', 1)
   239         except ValueError:
   253         except ValueError:
   240             qs = ''
   254             qs = ''
   248                 return url + '?' + self._cw.build_url_params(**params)
   262                 return url + '?' + self._cw.build_url_params(**params)
   249             try:
   263             try:
   250                 etype = self._cw.vreg.case_insensitive_etypes[etype.lower()]
   264                 etype = self._cw.vreg.case_insensitive_etypes[etype.lower()]
   251             except KeyError:
   265             except KeyError:
   252                 return url + '?' + self._cw.build_url_params(**params)
   266                 return url + '?' + self._cw.build_url_params(**params)
   253         if add_relations:
   267         relations = params.setdefault('relation', [])
   254             relations = params.setdefault('relation', [])
   268         for rtype, role, _ in self.source.mapping.get(etype, ()):
   255             for rtype, role, _ in self.source.mapping.get(etype, ()):
   269             if known_relations and rtype in known_relations.get('role', ()):
   256                 reldef = '%s-%s' % (rtype, role)
   270                 continue
   257                 if not reldef in relations:
   271             reldef = '%s-%s' % (rtype, role)
   258                     relations.append(reldef)
   272             if not reldef in relations:
       
   273                 relations.append(reldef)
   259         return url + '?' + self._cw.build_url_params(**params)
   274         return url + '?' + self._cw.build_url_params(**params)
   260 
   275 
   261     def complete_item(self, item, add_relations=True):
   276     def complete_item(self, item, rels):
   262         try:
   277         try:
   263             return self._parsed_urls[(item['cwuri'], add_relations)]
   278             return self._parsed_urls[item['cwuri']]
   264         except KeyError:
   279         except KeyError:
   265             itemurl = self.complete_url(item['cwuri'], item['cwtype'],
   280             itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels)
   266                                         add_relations)
       
   267             item_rels = list(self.parse(itemurl))
   281             item_rels = list(self.parse(itemurl))
   268             assert len(item_rels) == 1, 'url %s expected to bring back one '\
   282             assert len(item_rels) == 1, 'url %s expected to bring back one '\
   269                    'and only one entity, got %s' % (itemurl, len(item_rels))
   283                    'and only one entity, got %s' % (itemurl, len(item_rels))
   270             self._parsed_urls[(item['cwuri'], add_relations)] = item_rels[0]
   284             self._parsed_urls[item['cwuri']] = item_rels[0]
       
   285             if rels:
       
   286                 # XXX (do it better) merge relations
       
   287                 new_rels = item_rels[0][1]
       
   288                 new_rels.get('subject', {}).update(rels.get('subject', {}))
       
   289                 new_rels.get('object', {}).update(rels.get('object', {}))
   271             return item_rels[0]
   290             return item_rels[0]
   272 
   291 
   273 
   292 
   274 class CWEntityXMLItemBuilder(Component):
   293 class CWEntityXMLItemBuilder(Component):
   275     __regid__ = 'cw.entityxml.item-builder'
   294     __regid__ = 'cw.entityxml.item-builder'
   278         super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs)
   297         super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs)
   279         self.parser = parser
   298         self.parser = parser
   280         self.node = node
   299         self.node = node
   281 
   300 
   282     def build_item(self):
   301     def build_item(self):
       
   302         """parse a XML document node and return two dictionaries defining (part
       
   303         of) an entity:
       
   304 
       
   305         - {attribute: value}
       
   306         - {role: {relation: [(related item, related rels)...]}
       
   307         """
   283         node = self.node
   308         node = self.node
   284         item = dict(node.attrib.items())
   309         item = dict(node.attrib.items())
   285         item['cwtype'] = unicode(node.tag)
   310         item['cwtype'] = unicode(node.tag)
   286         item.setdefault('cwsource', None)
   311         item.setdefault('cwsource', None)
   287         try:
   312         try:
   294         for child in node:
   319         for child in node:
   295             role = child.get('role')
   320             role = child.get('role')
   296             if role:
   321             if role:
   297                 # relation
   322                 # relation
   298                 related = rels.setdefault(role, {}).setdefault(child.tag, [])
   323                 related = rels.setdefault(role, {}).setdefault(child.tag, [])
   299                 related += [ritem for ritem, _ in self.parser.parse_etree(child)]
   324                 related += self.parser.parse_etree(child)
   300             elif child.text:
   325             elif child.text:
   301                 # attribute
   326                 # attribute
   302                 item[child.tag] = unicode(child.text)
   327                 item[child.tag] = unicode(child.text)
   303             else:
   328             else:
   304                 # None attribute (empty tag)
   329                 # None attribute (empty tag)
   335 
   360 
   336     def link_items(self, others, rules):
   361     def link_items(self, others, rules):
   337         assert not any(x[1] for x in rules), "'copy' action takes no option"
   362         assert not any(x[1] for x in rules), "'copy' action takes no option"
   338         ttypes = frozenset([x[0] for x in rules])
   363         ttypes = frozenset([x[0] for x in rules])
   339         eids = [] # local eids
   364         eids = [] # local eids
   340         for item in others:
   365         for item, rels in others:
   341             if item['cwtype'] in ttypes:
   366             if item['cwtype'] in ttypes:
   342                 item = self.parser.complete_item(item)[0]
   367                 item, rels = self.parser.complete_item(item, rels)
   343                 other_entity = self.parser.process_item(item, [])
   368                 other_entity = self.parser.process_item(item, rels)
   344                 if other_entity is not None:
   369                 if other_entity is not None:
   345                     eids.append(other_entity.eid)
   370                     eids.append(other_entity.eid)
   346         if eids:
   371         if eids:
   347             self._set_relation(eids)
   372             self._set_relation(eids)
   348         else:
   373         else:
   393     def _related_link(self, ttype, others, searchattrs):
   418     def _related_link(self, ttype, others, searchattrs):
   394         def issubset(x,y):
   419         def issubset(x,y):
   395             return all(z in y for z in x)
   420             return all(z in y for z in x)
   396         eids = [] # local eids
   421         eids = [] # local eids
   397         source = self.parser.source
   422         source = self.parser.source
   398         for item in others:
   423         for item, rels in others:
   399             if item['cwtype'] != ttype:
   424             if item['cwtype'] != ttype:
   400                 continue
   425                 continue
   401             if not issubset(searchattrs, item):
   426             if not issubset(searchattrs, item):
   402                 item = self.parser.complete_item(item, False)[0]
   427                 item, rels = self.parser.complete_item(item, rels)
   403                 if not issubset(searchattrs, item):
   428                 if not issubset(searchattrs, item):
   404                     source.error('missing attribute, got %s expected keys %s',
   429                     source.error('missing attribute, got %s expected keys %s',
   405                                  item, searchattrs)
   430                                  item, searchattrs)
   406                     continue
   431                     continue
   407             # XXX str() needed with python < 2.6
   432             # XXX str() needed with python < 2.6
   408             kwargs = dict((str(attr), item[attr]) for attr in searchattrs)
   433             kwargs = dict((str(attr), item[attr]) for attr in searchattrs)
   409             targets = self._find_entities(item, kwargs)
   434             targets = self._find_entities(item, kwargs)
   410             if len(targets) > 1:
   435             if len(targets) == 1:
   411                 source.error('ambiguous link: found %s entity %s with attributes %s',
   436                 entity = targets[0]
   412                              len(targets), item['cwtype'], kwargs)
   437             elif not targets and self.create_when_not_found:
   413             elif len(targets) == 1:
   438                 entity = self._cw.create_entity(item['cwtype'], **kwargs)
   414                 eids.append(targets[0].eid)
       
   415             elif self.create_when_not_found:
       
   416                 eids.append(self._cw.create_entity(item['cwtype'], **kwargs).eid)
       
   417             else:
   439             else:
   418                 source.error('can not find %s entity with attributes %s',
   440                 if len(targets) > 1:
   419                              item['cwtype'], kwargs)
   441                     source.error('ambiguous link: found %s entity %s with attributes %s',
       
   442                                  len(targets), item['cwtype'], kwargs)
       
   443                 else:
       
   444                     source.error('can not find %s entity with attributes %s',
       
   445                                  item['cwtype'], kwargs)
       
   446                 continue
       
   447             eids.append(entity.eid)
       
   448             self.parser.process_relations(entity, rels)
   420         if eids:
   449         if eids:
   421             self._set_relation(eids)
   450             self._set_relation(eids)
   422         else:
   451         else:
   423             self._clear_relation((ttype,))
   452             self._clear_relation((ttype,))
   424 
   453