sobjects/parsers.py
branchstable
changeset 7354 f627ab500fda
parent 7351 ed66f236715d
child 7378 86a1ae289f05
child 7480 97a1768bbbaa
--- a/sobjects/parsers.py	Tue May 10 16:15:43 2011 +0200
+++ b/sobjects/parsers.py	Mon May 02 20:41:46 2011 +0200
@@ -15,7 +15,21 @@
 #
 # You should have received a copy of the GNU Lesser General Public License along
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
-"""datafeed parser for xml generated by cubicweb"""
+"""datafeed parser for xml generated by cubicweb
+
+Example of mapping for CWEntityXMLParser::
+
+  {u'CWUser': {                                        # EntityType
+      (u'in_group', u'subject', u'link'): [            # (rtype, role, action)
+          (u'CWGroup', {u'linkattr': u'name'})],       #   -> rules = [(EntityType, options), ...]
+      (u'tags', u'object', u'link-or-create'): [       # (...)
+          (u'Tag', {u'linkattr': u'name'})],           #   -> ...
+      (u'use_email', u'subject', u'copy'): [           # (...)
+          (u'EmailAddress', {})]                       #   -> ...
+      }
+   }
+
+"""
 
 import urllib2
 import StringIO
@@ -34,11 +48,12 @@
 from cubicweb import ValidationError, typed_eid
 from cubicweb.server.sources import datafeed
 
-def ensure_str_keys(dict):
-    for key in dict:
-        dict[str(key)] = dict.pop(key)
+def ensure_str_keys(dic):
+    for key in dic:
+        dic[str(key)] = dic.pop(key)
 
-# see cubicweb.web.views.xmlrss.SERIALIZERS
+# XXX see cubicweb.cwvreg.YAMS_TO_PY
+# XXX see cubicweb.web.views.xmlrss.SERIALIZERS
 DEFAULT_CONVERTERS = BASE_CONVERTERS.copy()
 DEFAULT_CONVERTERS['String'] = unicode
 DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8')
@@ -76,7 +91,7 @@
             typeddict[rschema.type] = converters[attrtype](stringdict[rschema])
     return typeddict
 
-def _entity_etree(parent):
+def _parse_entity_etree(parent):
     for node in list(parent):
         try:
             item = {'cwtype': unicode(node.tag),
@@ -92,20 +107,18 @@
         rels = {}
         for child in node:
             role = child.get('role')
-            if child.get('role'):
+            if role:
                 # relation
                 related = rels.setdefault(role, {}).setdefault(child.tag, [])
-                related += [ritem for ritem, _ in _entity_etree(child)]
+                related += [ritem for ritem, _ in _parse_entity_etree(child)]
             else:
                 # attribute
                 item[child.tag] = unicode(child.text)
         yield item, rels
 
 def build_search_rql(etype, attrs):
-    restrictions = []
-    for attr in attrs:
-        restrictions.append('X %(attr)s %%(%(attr)s)s' % {'attr': attr})
-    return 'Any X WHERE X is %s, %s' % (etype, ','.join(restrictions))
+    restrictions = ['X %(attr)s %%(%(attr)s)s'%{'attr': attr} for attr in attrs]
+    return 'Any X WHERE X is %s, %s' % (etype, ', '.join(restrictions))
 
 def rtype_role_rql(rtype, role):
     if role == 'object':
@@ -121,7 +134,7 @@
 
 def _check_linkattr_option(action, options, eid, _):
     if not 'linkattr' in options:
-        msg = _("'%s' action require 'linkattr' option") % action
+        msg = _("'%s' action requires 'linkattr' option") % action
         raise ValidationError(eid, {rn('options', 'subject'): msg})
 
 
@@ -230,19 +243,12 @@
                     break
             self.source.info('GET %s', url)
             stream = _OPENER.open(url)
-        return _entity_etree(etree.parse(stream).getroot())
-
-    def process_one(self, url):
-        # XXX assert len(root.children) == 1
-        for item, rels in self.parse(url):
-            return self.process_item(item, rels)
+        return _parse_entity_etree(etree.parse(stream).getroot())
 
     def process_item(self, item, rels):
-        entity = self.extid2entity(str(item.pop('cwuri')),
-                                   item.pop('cwtype'),
+        entity = self.extid2entity(str(item.pop('cwuri')),  item.pop('cwtype'),
                                    item=item)
-        if not (self.created_during_pull(entity)
-                or self.updated_during_pull(entity)):
+        if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):
             self.notify_updated(entity)
             item.pop('eid')
             # XXX check modification date
@@ -250,16 +256,16 @@
             entity.set_attributes(**attrs)
         for (rtype, role, action), rules in self.source.mapping.get(entity.__regid__, {}).iteritems():
             try:
-                rel = rels[role][rtype]
+                related_items = rels[role][rtype]
             except KeyError:
-                self.source.error('relation %s-%s doesn\'t seem exported in %s xml',
+                self.source.error('relation %s-%s not found in xml export of %s',
                                   rtype, role, entity.__regid__)
                 continue
             try:
                 actionmethod = self.action_methods[action]
             except KeyError:
                 raise Exception('Unknown action %s' % action)
-            actionmethod(entity, rtype, role, rel, rules)
+            actionmethod(entity, rtype, role, related_items, rules)
         return entity
 
     def before_entity_copy(self, entity, sourceparams):
@@ -267,89 +273,89 @@
         attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
         entity.cw_edited.update(attrs)
 
-    def related_copy(self, entity, rtype, role, value, rules):
+    def related_copy(self, entity, rtype, role, others, rules):
         """implementation of 'copy' action
 
         Takes no option.
         """
         assert not any(x[1] for x in rules), "'copy' action takes no option"
         ttypes = set([x[0] for x in rules])
-        value = [item for item in value if item['cwtype'] in ttypes]
+        others = [item for item in others if item['cwtype'] in ttypes]
         eids = [] # local eids
-        if not value:
+        if not others:
             self._clear_relation(entity, rtype, role, ttypes)
             return
-        for item in value:
-            eids.append(self.process_one(self._complete_url(item)).eid)
+        for item in others:
+            item, _rels = self._complete_item(item)
+            other_entity = self.process_item(item, [])
+            eids.append(other_entity.eid)
         self._set_relation(entity, rtype, role, eids)
 
-    def related_link(self, entity, rtype, role, value, rules):
+    def related_link(self, entity, rtype, role, others, rules):
         """implementation of 'link' action
 
         requires an options to control search of the linked entity.
         """
         for ttype, options in rules:
             assert 'linkattr' in options, (
-                "'link-or-create' action require a list of attributes used to "
+                "'link' action requires a list of attributes used to "
                 "search if the entity already exists")
-            self._related_link(entity, rtype, role, ttype, value, [options['linkattr']],
-                               self._log_not_found)
+            self._related_link(entity, rtype, role, ttype, others, [options['linkattr']],
+                               create_when_not_found=False)
 
-    def related_link_or_create(self, entity, rtype, role, value, rules):
+    def related_link_or_create(self, entity, rtype, role, others, rules):
         """implementation of 'link-or-create' action
 
         requires an options to control search of the linked entity.
         """
         for ttype, options in rules:
             assert 'linkattr' in options, (
-                "'link-or-create' action require a list of attributes used to "
+                "'link-or-create' action requires a list of attributes used to "
                 "search if the entity already exists")
-            self._related_link(entity, rtype, role, ttype, value, [options['linkattr']],
-                               self._create_not_found)
-
-    def _log_not_found(self, entity, rtype, role, ritem, searchvalues):
-        self.source.error('can find %s entity with attributes %s',
-                          ritem['cwtype'], searchvalues)
+            self._related_link(entity, rtype, role, ttype, others, [options['linkattr']],
+                               create_when_not_found=True)
 
-    def _create_not_found(self, entity, rtype, role, ritem, searchvalues):
-        ensure_str_keys(searchvalues) # XXX necessary with python < 2.6
-        return self._cw.create_entity(ritem['cwtype'], **searchvalues).eid
-
-    def _related_link(self, entity, rtype, role, ttype, value, searchattrs,
-                      notfound_callback):
+    def _related_link(self, entity, rtype, role, ttype, others, searchattrs,
+                      create_when_not_found):
+        def issubset(x,y):
+            return all(z in y for z in x)
         eids = [] # local eids
-        for item in value:
+        for item in others:
             if item['cwtype'] != ttype:
                 continue
-            if not all(attr in item for attr in searchattrs):
-                # need to fetch related entity's xml
-                ritems = list(self.parse(self._complete_url(item, False)))
-                assert len(ritems) == 1, 'unexpected xml'
-                ritem = ritems[0][0] # list of 2-uples
-                assert all(attr in ritem for attr in searchattrs), \
-                       'missing attribute, got %s expected keys %s' % (item, searchattrs)
-            else:
-                ritem = item
-            kwargs = dict((attr, ritem[attr]) for attr in searchattrs)
+            if not issubset(searchattrs, item):
+                item, _rels = self._complete_item(item, False)
+                if not issubset(searchattrs, item):
+                    self.source.error('missing attribute, got %s expected keys %s'
+                                      % item, searchattrs)
+                    continue
+            kwargs = dict((attr, item[attr]) for attr in searchattrs)
             rql = build_search_rql(item['cwtype'], kwargs)
             rset = self._cw.execute(rql, kwargs)
-            if rset:
-                assert len(rset) == 1
+            if len(rset) > 1:
+                self.source.error('ambiguous link: found %s entity %s with attributes %s',
+                                  len(rset), item['cwtype'], kwargs)
+            elif len(rset) == 1:
                 eids.append(rset[0][0])
+            elif create_when_not_found:
+                ensure_str_keys(kwargs) # XXX necessary with python < 2.6
+                eids.append(self._cw.create_entity(item['cwtype'], **kwargs).eid)
             else:
-                eid = notfound_callback(entity, rtype, role, ritem, kwargs)
-                if eid is not None:
-                    eids.append(eid)
+                self.source.error('can not find %s entity with attributes %s',
+                                  item['cwtype'], kwargs)
         if not eids:
             self._clear_relation(entity, rtype, role, (ttype,))
         else:
             self._set_relation(entity, rtype, role, eids)
 
-    def _complete_url(self, item, add_relations=True):
+    def _complete_item(self, item, add_relations=True):
         itemurl = item['cwuri'] + '?vid=xml'
-        for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()):
-            itemurl += '&relation=%s_%s' % (rtype, role)
-        return itemurl
+        if add_relations:
+            for rtype, role, _ in self.source.mapping.get(item['cwtype'], ()):
+                itemurl += '&relation=%s_%s' % (rtype, role)
+        item_rels = list(self.parse(itemurl))
+        assert len(item_rels) == 1
+        return item_rels[0]
 
     def _clear_relation(self, entity, rtype, role, ttypes):
         if entity.eid not in self.stats['created']: