dataimport/importer.py
changeset 11275 814f54d6183b
parent 10939 b30c2f49da57
equal deleted inserted replaced
11274:d0f6fe008ec4 11275:814f54d6183b
   284         """Yield external entities in an order which attempts to satisfy
   284         """Yield external entities in an order which attempts to satisfy
   285         schema constraints (inlined / cardinality) and to optimize the import.
   285         schema constraints (inlined / cardinality) and to optimize the import.
   286         """
   286         """
   287         schema = self.schema
   287         schema = self.schema
   288         extid2eid = self.extid2eid
   288         extid2eid = self.extid2eid
       
   289         order_hint = list(self.etypes_order_hint)
   289         for ext_entity in ext_entities:
   290         for ext_entity in ext_entities:
   290             # check data in the transitional representation and prepare it for
   291             # check data in the transitional representation and prepare it for
   291             # later insertion in the database
   292             # later insertion in the database
   292             for subject_uri, rtype, object_uri in ext_entity.prepare(schema):
   293             for subject_uri, rtype, object_uri in ext_entity.prepare(schema):
   293                 deferred.setdefault(rtype, set()).add((subject_uri, object_uri))
   294                 deferred.setdefault(rtype, set()).add((subject_uri, object_uri))
   294             if not ext_entity.is_ready(extid2eid):
   295             if not ext_entity.is_ready(extid2eid):
   295                 queue.setdefault(ext_entity.etype, []).append(ext_entity)
   296                 queue.setdefault(ext_entity.etype, []).append(ext_entity)
   296                 continue
   297                 continue
   297             yield ext_entity
   298             yield ext_entity
       
   299             if not queue:
       
   300                 continue
   298             # check for some entities in the queue that may now be ready. We'll have to restart
   301             # check for some entities in the queue that may now be ready. We'll have to restart
   299             # search for ready entities until no one is generated
   302             # search for ready entities until no one is generated
       
   303             for etype in queue:
       
   304                 if etype not in order_hint:
       
   305                     order_hint.append(etype)
   300             new = True
   306             new = True
   301             while new:
   307             while new:
   302                 new = False
   308                 new = False
   303                 for etype in self.etypes_order_hint:
   309                 for etype in order_hint:
   304                     if etype in queue:
   310                     if etype in queue:
   305                         new_queue = []
   311                         new_queue = []
   306                         for ext_entity in queue[etype]:
   312                         for ext_entity in queue[etype]:
   307                             if ext_entity.is_ready(extid2eid):
   313                             if ext_entity.is_ready(extid2eid):
   308                                 yield ext_entity
   314                                 yield ext_entity
   342             existing = self.existing_relations[rtype]
   348             existing = self.existing_relations[rtype]
   343             for subject_uri, object_uri in relations:
   349             for subject_uri, object_uri in relations:
   344                 try:
   350                 try:
   345                     subject_eid = extid2eid[subject_uri]
   351                     subject_eid = extid2eid[subject_uri]
   346                     object_eid = extid2eid[object_uri]
   352                     object_eid = extid2eid[object_uri]
   347                 except KeyError:
   353                 except KeyError as exc:
   348                     missing_relations.append((subject_uri, rtype, object_uri))
   354                     missing_relations.append((subject_uri, rtype, object_uri, exc))
   349                     continue
   355                     continue
   350                 if (subject_eid, object_eid) not in existing:
   356                 if (subject_eid, object_eid) not in existing:
   351                     prepare_insert_relation(subject_eid, rtype, object_eid)
   357                     prepare_insert_relation(subject_eid, rtype, object_eid)
   352                     existing.add((subject_eid, object_eid))
   358                     existing.add((subject_eid, object_eid))
   353                     if symmetric:
   359                     if symmetric:
   365             map(error, msgs)
   371             map(error, msgs)
   366             if self.raise_on_error:
   372             if self.raise_on_error:
   367                 raise Exception('\n'.join(msgs))
   373                 raise Exception('\n'.join(msgs))
   368         if missing_relations:
   374         if missing_relations:
   369             msgs = ["can't create some relations, is there missing data?"]
   375             msgs = ["can't create some relations, is there missing data?"]
   370             for subject_uri, rtype, object_uri in missing_relations:
   376             for subject_uri, rtype, object_uri, exc in missing_relations:
   371                 msgs.append("%s %s %s" % (subject_uri, rtype, object_uri))
   377                 msgs.append("Could not find %s when trying to insert (%s, %s, %s)"
       
   378                             % (exc, subject_uri, rtype, object_uri))
   372             map(error, msgs)
   379             map(error, msgs)
   373             if self.raise_on_error:
   380             if self.raise_on_error:
   374                 raise Exception('\n'.join(msgs))
   381                 raise Exception('\n'.join(msgs))
   375 
   382 
   376 
   383