[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
authorNicolas Chauvat <nicolas.chauvat@logilab.fr>
Wed, 04 May 2016 23:52:36 +0200
changeset 11275 814f54d6183b
parent 11274 d0f6fe008ec4
child 11278 19fcce6dc6d1
[dataimport] order of ExtEntities should be irrelevant (closes #13117472)
dataimport/importer.py
dataimport/test/unittest_importer.py
--- a/dataimport/importer.py	Wed Jun 01 15:36:18 2016 +0200
+++ b/dataimport/importer.py	Wed May 04 23:52:36 2016 +0200
@@ -286,6 +286,7 @@
         """
         schema = self.schema
         extid2eid = self.extid2eid
+        order_hint = list(self.etypes_order_hint)
         for ext_entity in ext_entities:
             # check data in the transitional representation and prepare it for
             # later insertion in the database
@@ -295,12 +296,17 @@
                 queue.setdefault(ext_entity.etype, []).append(ext_entity)
                 continue
             yield ext_entity
+            if not queue:
+                continue
             # check for some entities in the queue that may now be ready. We'll have to restart
             # search for ready entities until no one is generated
+            for etype in queue:
+                if etype not in order_hint:
+                    order_hint.append(etype)
             new = True
             while new:
                 new = False
-                for etype in self.etypes_order_hint:
+                for etype in order_hint:
                     if etype in queue:
                         new_queue = []
                         for ext_entity in queue[etype]:
@@ -344,8 +350,8 @@
                 try:
                     subject_eid = extid2eid[subject_uri]
                     object_eid = extid2eid[object_uri]
-                except KeyError:
-                    missing_relations.append((subject_uri, rtype, object_uri))
+                except KeyError as exc:
+                    missing_relations.append((subject_uri, rtype, object_uri, exc))
                     continue
                 if (subject_eid, object_eid) not in existing:
                     prepare_insert_relation(subject_eid, rtype, object_eid)
@@ -367,8 +373,9 @@
                 raise Exception('\n'.join(msgs))
         if missing_relations:
             msgs = ["can't create some relations, is there missing data?"]
-            for subject_uri, rtype, object_uri in missing_relations:
-                msgs.append("%s %s %s" % (subject_uri, rtype, object_uri))
+            for subject_uri, rtype, object_uri, exc in missing_relations:
+                msgs.append("Could not find %s when trying to insert (%s, %s, %s)"
+                            % (exc, subject_uri, rtype, object_uri))
             map(error, msgs)
             if self.raise_on_error:
                 raise Exception('\n'.join(msgs))
--- a/dataimport/test/unittest_importer.py	Wed Jun 01 15:36:18 2016 +0200
+++ b/dataimport/test/unittest_importer.py	Wed May 04 23:52:36 2016 +0200
@@ -126,6 +126,19 @@
             self.assertEqual(entity.nom, u'Richelieu')
             self.assertEqual(len(entity.connait), 0)
 
+    def test_import_order(self):
+        """Check import of ext entity in both order"""
+        with self.admin_access.repo_cnx() as cnx:
+            importer = self.importer(cnx)
+            richelieu = ExtEntity('Personne', 3, {'nom': set([u'Richelieu']),
+                                                  'enfant': set([4])})
+            athos = ExtEntity('Personne', 4, {'nom': set([u'Athos'])})
+            importer.import_entities([richelieu, athos])
+            cnx.commit()
+            rset = cnx.execute('Any X WHERE X is Personne, X nom "Richelieu"')
+            entity = rset.get_entity(0, 0)
+            self.assertEqual(entity.enfant[0].nom, 'Athos')
+
     def test_update(self):
         """Check update of ext entity"""
         with self.admin_access.repo_cnx() as cnx: