# HG changeset patch # User Sylvain Thénault <sylvain.thenault@logilab.fr> # Date 1454494431 -3600 # Node ID df928a3a94e31435486f8d34a34a43f672cd5cbb # Parent 78c8e64f3cef7a4a57ad789bf8e7efccf45e2a25 [dataimport] add a filter function to not fail if some extentity has several values for an attribute of final relation the function will simply record a warning in the import log and keep a value randomly. Notice the function has to be explicitly inserted in the transformation stream. diff -r 78c8e64f3cef -r df928a3a94e3 cubicweb/dataimport/importer.py --- a/cubicweb/dataimport/importer.py Wed Feb 03 11:12:09 2016 +0100 +++ b/cubicweb/dataimport/importer.py Wed Feb 03 11:13:51 2016 +0100 @@ -1,4 +1,4 @@ -# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr -- mailto:contact@logilab.fr # # This program is free software: you can redistribute it and/or modify it under @@ -74,6 +74,37 @@ return use_extid_as_cwuri_filter +def drop_extra_values(extentities, schema, import_log): + """Return a generator of :class:`ExtEntity` objects that will ensure their attributes and + inlined relations have a single value. When it's not the case, a warning will be recorded in + the import log and one value among other will be kept (randomly). + + `schema` is the instance's schema, `import_log` is an instance of a class implementing the + :class:`SimpleImportLog` interface. + + Example usage: + + .. code-block:: python + + importer = ExtEntitiesImporter(schema, store, import_log) + importer.import_entities(drop_extra_values(extentities, schema, import_log)) + + """ + _get_rschema = schema.rschema + for extentity in extentities: + entity_dict = extentity.values + for key, rtype, role in extentity.iter_rdefs(): + rschema = _get_rschema(rtype) + if (rschema.final or (rschema.inlined and role == 'subject')) \ + and len(entity_dict[key]) > 1: + values = ', '.join(repr(v) for v in entity_dict[key]) + import_log.record_warning( + "more than one value for attribute %r, only one will be kept: %s" + % (rtype, values), path=extentity.extid) + entity_dict[key] = set([entity_dict[key].pop()]) + yield extentity + + class RelationMapping(object): """Read-only mapping from relation type to set of related (subject, object) eids. @@ -161,6 +192,8 @@ Return a list of non inlined relations that may be inserted later, each relations defined by a 3-tuple (subject extid, relation type, object extid). + The instance's schema is given as argument. + Take care the importer may call this method several times. """ assert self._schema is None, 'prepare() has already been called for %s' % self diff -r 78c8e64f3cef -r df928a3a94e3 cubicweb/dataimport/test/unittest_importer.py --- a/cubicweb/dataimport/test/unittest_importer.py Wed Feb 03 11:12:09 2016 +0100 +++ b/cubicweb/dataimport/test/unittest_importer.py Wed Feb 03 11:13:51 2016 +0100 @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. # contact http://www.logilab.fr -- mailto:contact@logilab.fr # # This program is free software: you can redistribute it and/or modify it under @@ -16,15 +16,13 @@ # with this program. If not, see <http://www.gnu.org/licenses/>. """Tests for cubicweb.dataimport.importer""" -from collections import defaultdict - from logilab.common.testlib import TestCase, unittest_main from cubicweb import ValidationError from cubicweb.devtools.testlib import CubicWebTC from cubicweb.dataimport import RQLObjectStore, ucsvreader -from cubicweb.dataimport.importer import (ExtEntity, ExtEntitiesImporter, SimpleImportLog, - RelationMapping, use_extid_as_cwuri) +from cubicweb.dataimport.importer import (ExtEntity, ExtEntitiesImporter, RelationMapping, + SimpleImportLog, use_extid_as_cwuri, drop_extra_values) class RelationMappingTC(CubicWebTC): @@ -165,6 +163,21 @@ self.assertNotIn('cwuri', personne.values) +class DropExtraValuesTC(CubicWebTC): + + def test(self): + personne = ExtEntity('Personne', b'1', {'nom': set([u'de la lune', 'di la luna']), + 'prenom': set([u'Jean']), + 'enfant': set('23'), + 'connait': set('45')}) + log = SimpleImportLog('<unspecified>') + list(drop_extra_values((personne,), self.schema, log)) + self.assertEqual(len(personne.values['nom']), 1) + self.assertEqual(len(personne.values['enfant']), 1) + self.assertEqual(len(personne.values['connait']), 2) + self.assertEqual(len(log.logs), 2) + + def extentities_from_csv(fpath): """Yield ExtEntity read from `fpath` CSV file.""" with open(fpath, 'rb') as f: