[dataimport] add a filter function to not fail if some extentity has several values for an attribute of final relation
the function will simply record a warning in the import log and keep a value
randomly.
Notice the function has to be explicitly inserted in the transformation stream.
--- a/cubicweb/dataimport/importer.py Wed Feb 03 11:12:09 2016 +0100
+++ b/cubicweb/dataimport/importer.py Wed Feb 03 11:13:51 2016 +0100
@@ -1,4 +1,4 @@
-# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
@@ -74,6 +74,37 @@
return use_extid_as_cwuri_filter
+def drop_extra_values(extentities, schema, import_log):
+ """Return a generator of :class:`ExtEntity` objects that will ensure their attributes and
+ inlined relations have a single value. When it's not the case, a warning will be recorded in
+ the import log and one value among other will be kept (randomly).
+
+ `schema` is the instance's schema, `import_log` is an instance of a class implementing the
+ :class:`SimpleImportLog` interface.
+
+ Example usage:
+
+ .. code-block:: python
+
+ importer = ExtEntitiesImporter(schema, store, import_log)
+ importer.import_entities(drop_extra_values(extentities, schema, import_log))
+
+ """
+ _get_rschema = schema.rschema
+ for extentity in extentities:
+ entity_dict = extentity.values
+ for key, rtype, role in extentity.iter_rdefs():
+ rschema = _get_rschema(rtype)
+ if (rschema.final or (rschema.inlined and role == 'subject')) \
+ and len(entity_dict[key]) > 1:
+ values = ', '.join(repr(v) for v in entity_dict[key])
+ import_log.record_warning(
+ "more than one value for attribute %r, only one will be kept: %s"
+ % (rtype, values), path=extentity.extid)
+ entity_dict[key] = set([entity_dict[key].pop()])
+ yield extentity
+
+
class RelationMapping(object):
"""Read-only mapping from relation type to set of related (subject, object) eids.
@@ -161,6 +192,8 @@
Return a list of non inlined relations that may be inserted later, each relations defined by
a 3-tuple (subject extid, relation type, object extid).
+ The instance's schema is given as argument.
+
Take care the importer may call this method several times.
"""
assert self._schema is None, 'prepare() has already been called for %s' % self
--- a/cubicweb/dataimport/test/unittest_importer.py Wed Feb 03 11:12:09 2016 +0100
+++ b/cubicweb/dataimport/test/unittest_importer.py Wed Feb 03 11:13:51 2016 +0100
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr -- mailto:contact@logilab.fr
#
# This program is free software: you can redistribute it and/or modify it under
@@ -16,15 +16,13 @@
# with this program. If not, see <http://www.gnu.org/licenses/>.
"""Tests for cubicweb.dataimport.importer"""
-from collections import defaultdict
-
from logilab.common.testlib import TestCase, unittest_main
from cubicweb import ValidationError
from cubicweb.devtools.testlib import CubicWebTC
from cubicweb.dataimport import RQLObjectStore, ucsvreader
-from cubicweb.dataimport.importer import (ExtEntity, ExtEntitiesImporter, SimpleImportLog,
- RelationMapping, use_extid_as_cwuri)
+from cubicweb.dataimport.importer import (ExtEntity, ExtEntitiesImporter, RelationMapping,
+ SimpleImportLog, use_extid_as_cwuri, drop_extra_values)
class RelationMappingTC(CubicWebTC):
@@ -165,6 +163,21 @@
self.assertNotIn('cwuri', personne.values)
+class DropExtraValuesTC(CubicWebTC):
+
+ def test(self):
+ personne = ExtEntity('Personne', b'1', {'nom': set([u'de la lune', 'di la luna']),
+ 'prenom': set([u'Jean']),
+ 'enfant': set('23'),
+ 'connait': set('45')})
+ log = SimpleImportLog('<unspecified>')
+ list(drop_extra_values((personne,), self.schema, log))
+ self.assertEqual(len(personne.values['nom']), 1)
+ self.assertEqual(len(personne.values['enfant']), 1)
+ self.assertEqual(len(personne.values['connait']), 2)
+ self.assertEqual(len(log.logs), 2)
+
+
def extentities_from_csv(fpath):
"""Yield ExtEntity read from `fpath` CSV file."""
with open(fpath, 'rb') as f: