[datafeed parser] properly recurse on nodes to follow all mapped relations (closes #1988432) stable
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Fri, 07 Oct 2011 11:47:09 +0200
branchstable
changeset 7916 54e240c1b419
parent 7915 a7f3245e1728
child 7917 436400e7f807
[datafeed parser] properly recurse on nodes to follow all mapped relations (closes #1988432)
sobjects/parsers.py
sobjects/test/data/schema.py
sobjects/test/unittest_parsers.py
--- a/sobjects/parsers.py	Thu Oct 06 16:15:16 2011 +0200
+++ b/sobjects/parsers.py	Fri Oct 07 11:47:09 2011 +0200
@@ -193,6 +193,13 @@
             yield builder.build_item()
 
     def process_item(self, item, rels):
+        """
+        item and rels are what's returned by the item builder `build_item` method:
+
+        * `item` is an {attribute: value} dictionary
+        * `rels` is for relations and structured as
+           {role: {relation: [(related item, related rels)...]}
+        """
         entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
                                    cwsource=item.pop('cwsource'), item=item)
         if entity is None:
@@ -206,12 +213,17 @@
             # XXX check modification date
             attrs = extract_typed_attrs(entity.e_schema, item)
             entity.set_attributes(**attrs)
-        for (rtype, role, action), rules in self.source.mapping.get(entity.__regid__, {}).iteritems():
+        self.process_relations(entity, rels)
+        return entity
+
+    def process_relations(self, entity, rels):
+        etype = entity.__regid__
+        for (rtype, role, action), rules in self.source.mapping.get(etype, {}).iteritems():
             try:
                 related_items = rels[role][rtype]
             except KeyError:
                 self.source.error('relation %s-%s not found in xml export of %s',
-                                  rtype, role, entity.__regid__)
+                                  rtype, role, etype)
                 continue
             try:
                 linker = self.select_linker(action, rtype, role, entity)
@@ -219,20 +231,22 @@
                 self.source.error('no linker for action %s', action)
             else:
                 linker.link_items(related_items, rules)
-        return entity
 
     def before_entity_copy(self, entity, sourceparams):
         """IDataFeedParser callback"""
         attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
         entity.cw_edited.update(attrs)
 
-    def complete_url(self, url, etype=None, add_relations=True):
+    def complete_url(self, url, etype=None, known_relations=None):
         """append to the url's query string information about relation that should
         be included in the resulting xml, according to source mapping.
 
         If etype is not specified, try to guess it using the last path part of
         the url, i.e. the format used by default in cubicweb to map all entities
         of a given type as in 'http://mysite.org/EntityType'.
+
+        If `known_relations` is given, it should be a dictionary of already
+        known relations, so they don't get queried again.
         """
         try:
             url, qs = url.split('?', 1)
@@ -250,24 +264,29 @@
                 etype = self._cw.vreg.case_insensitive_etypes[etype.lower()]
             except KeyError:
                 return url + '?' + self._cw.build_url_params(**params)
-        if add_relations:
-            relations = params.setdefault('relation', [])
-            for rtype, role, _ in self.source.mapping.get(etype, ()):
-                reldef = '%s-%s' % (rtype, role)
-                if not reldef in relations:
-                    relations.append(reldef)
+        relations = params.setdefault('relation', [])
+        for rtype, role, _ in self.source.mapping.get(etype, ()):
+            if known_relations and rtype in known_relations.get('role', ()):
+                continue
+            reldef = '%s-%s' % (rtype, role)
+            if not reldef in relations:
+                relations.append(reldef)
         return url + '?' + self._cw.build_url_params(**params)
 
-    def complete_item(self, item, add_relations=True):
+    def complete_item(self, item, rels):
         try:
-            return self._parsed_urls[(item['cwuri'], add_relations)]
+            return self._parsed_urls[item['cwuri']]
         except KeyError:
-            itemurl = self.complete_url(item['cwuri'], item['cwtype'],
-                                        add_relations)
+            itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels)
             item_rels = list(self.parse(itemurl))
             assert len(item_rels) == 1, 'url %s expected to bring back one '\
                    'and only one entity, got %s' % (itemurl, len(item_rels))
-            self._parsed_urls[(item['cwuri'], add_relations)] = item_rels[0]
+            self._parsed_urls[item['cwuri']] = item_rels[0]
+            if rels:
+                # XXX (do it better) merge relations
+                new_rels = item_rels[0][1]
+                new_rels.get('subject', {}).update(rels.get('subject', {}))
+                new_rels.get('object', {}).update(rels.get('object', {}))
             return item_rels[0]
 
 
@@ -280,6 +299,12 @@
         self.node = node
 
     def build_item(self):
+        """parse a XML document node and return two dictionaries defining (part
+        of) an entity:
+
+        - {attribute: value}
+        - {role: {relation: [(related item, related rels)...]}
+        """
         node = self.node
         item = dict(node.attrib.items())
         item['cwtype'] = unicode(node.tag)
@@ -296,7 +321,7 @@
             if role:
                 # relation
                 related = rels.setdefault(role, {}).setdefault(child.tag, [])
-                related += [ritem for ritem, _ in self.parser.parse_etree(child)]
+                related += self.parser.parse_etree(child)
             elif child.text:
                 # attribute
                 item[child.tag] = unicode(child.text)
@@ -337,10 +362,10 @@
         assert not any(x[1] for x in rules), "'copy' action takes no option"
         ttypes = frozenset([x[0] for x in rules])
         eids = [] # local eids
-        for item in others:
+        for item, rels in others:
             if item['cwtype'] in ttypes:
-                item = self.parser.complete_item(item)[0]
-                other_entity = self.parser.process_item(item, [])
+                item, rels = self.parser.complete_item(item, rels)
+                other_entity = self.parser.process_item(item, rels)
                 if other_entity is not None:
                     eids.append(other_entity.eid)
         if eids:
@@ -395,11 +420,11 @@
             return all(z in y for z in x)
         eids = [] # local eids
         source = self.parser.source
-        for item in others:
+        for item, rels in others:
             if item['cwtype'] != ttype:
                 continue
             if not issubset(searchattrs, item):
-                item = self.parser.complete_item(item, False)[0]
+                item, rels = self.parser.complete_item(item, rels)
                 if not issubset(searchattrs, item):
                     source.error('missing attribute, got %s expected keys %s',
                                  item, searchattrs)
@@ -407,16 +432,20 @@
             # XXX str() needed with python < 2.6
             kwargs = dict((str(attr), item[attr]) for attr in searchattrs)
             targets = self._find_entities(item, kwargs)
-            if len(targets) > 1:
-                source.error('ambiguous link: found %s entity %s with attributes %s',
-                             len(targets), item['cwtype'], kwargs)
-            elif len(targets) == 1:
-                eids.append(targets[0].eid)
-            elif self.create_when_not_found:
-                eids.append(self._cw.create_entity(item['cwtype'], **kwargs).eid)
+            if len(targets) == 1:
+                entity = targets[0]
+            elif not targets and self.create_when_not_found:
+                entity = self._cw.create_entity(item['cwtype'], **kwargs)
             else:
-                source.error('can not find %s entity with attributes %s',
-                             item['cwtype'], kwargs)
+                if len(targets) > 1:
+                    source.error('ambiguous link: found %s entity %s with attributes %s',
+                                 len(targets), item['cwtype'], kwargs)
+                else:
+                    source.error('can not find %s entity with attributes %s',
+                                 item['cwtype'], kwargs)
+                continue
+            eids.append(entity.eid)
+            self.parser.process_relations(entity, rels)
         if eids:
             self._set_relation(eids)
         else:
--- a/sobjects/test/data/schema.py	Thu Oct 06 16:15:16 2011 +0200
+++ b/sobjects/test/data/schema.py	Fri Oct 07 11:47:09 2011 +0200
@@ -25,4 +25,4 @@
 
 class Tag(EntityType):
     name = String(unique=True)
-    tags = SubjectRelation('CWUser')
+    tags = SubjectRelation(('CWUser', 'CWGroup', 'EmailAddress'))
--- a/sobjects/test/unittest_parsers.py	Thu Oct 06 16:15:16 2011 +0200
+++ b/sobjects/test/unittest_parsers.py	Fri Oct 07 11:47:09 2011 +0200
@@ -71,6 +71,9 @@
   <address>syt@logilab.fr</address>
   <modification_date>2010-04-13 14:35:56</modification_date>
   <creation_date>2010-04-13 14:35:56</creation_date>
+  <tags role="object">
+    <Tag cwuri="http://pouet.org/9" eid="9"/>
+  </tags>
  </EmailAddress>
 </rset>
 ''',
@@ -78,6 +81,9 @@
 <rset size="1">
  <CWGroup eid="7" cwuri="http://pouet.org/7">
   <name>users</name>
+  <tags role="object">
+    <Tag cwuri="http://pouet.org/9" eid="9"/>
+  </tags>
  </CWGroup>
 </rset>
 ''',
@@ -140,7 +146,7 @@
                               u'role=subject\naction=link\nlinkattr=name'),
                              (('CWUser', 'in_state', '*'),
                               u'role=subject\naction=link\nlinkattr=name'),
-                             (('*', 'tags', 'CWUser'),
+                             (('*', 'tags', '*'),
                               u'role=object\naction=link-or-create\nlinkattr=name'),
                             ])
         myotherfeed.init_mapping([(('CWUser', 'in_group', '*'),
@@ -177,7 +183,15 @@
                                  (u'Tag', {u'linkattr': u'name'})],
                              (u'use_email', u'subject', u'copy'): [
                                  (u'EmailAddress', {})]
-                             }
+                             },
+                          u'CWGroup': {
+                             (u'tags', u'object', u'link-or-create'): [
+                                 (u'Tag', {u'linkattr': u'name'})],
+                             },
+                          u'EmailAddress': {
+                             (u'tags', u'object', u'link-or-create'): [
+                                 (u'Tag', {u'linkattr': u'name'})],
+                             },
                           })
         session = self.repo.internal_session(safe=True)
         stats = dfsource.pull_data(session, force=True, raise_on_error=True)
@@ -198,17 +212,21 @@
         self.assertEqual(email.cwuri, 'http://pouet.org/6')
         self.assertEqual(email.absolute_url(), 'http://pouet.org/6')
         self.assertEqual(email.cw_source[0].name, 'myfeed')
+        self.assertEqual(len(email.reverse_tags), 1)
+        self.assertEqual(email.reverse_tags[0].name, 'hop')
         # link action
         self.assertFalse(self.execute('CWGroup X WHERE X name "unknown"'))
         groups = sorted([g.name for g in user.in_group])
         self.assertEqual(groups, ['users'])
+        group = user.in_group[0]
+        self.assertEqual(len(group.reverse_tags), 1)
+        self.assertEqual(group.reverse_tags[0].name, 'hop')
         # link or create action
-        tags = sorted([t.name for t in user.reverse_tags])
-        self.assertEqual(tags, ['hop', 'unknown'])
-        tag = self.execute('Tag X WHERE X name "unknown"').get_entity(0, 0)
-        self.assertEqual(tag.cwuri, 'http://testing.fr/cubicweb/%s' % tag.eid)
-        self.assertEqual(tag.cw_source[0].name, 'system')
-
+        tags = set([(t.name, t.cwuri.replace(str(t.eid), ''), t.cw_source[0].name)
+                    for t in user.reverse_tags])
+        self.assertEqual(tags, set((('hop', 'http://testing.fr/cubicweb/', 'system'),
+                                    ('unknown', 'http://testing.fr/cubicweb/', 'system')))
+                         )
         session.set_cnxset()
         stats = dfsource.pull_data(session, force=True, raise_on_error=True)
         self.assertEqual(stats['created'], set())