284 """Yield external entities in an order which attempts to satisfy |
284 """Yield external entities in an order which attempts to satisfy |
285 schema constraints (inlined / cardinality) and to optimize the import. |
285 schema constraints (inlined / cardinality) and to optimize the import. |
286 """ |
286 """ |
287 schema = self.schema |
287 schema = self.schema |
288 extid2eid = self.extid2eid |
288 extid2eid = self.extid2eid |
|
289 order_hint = list(self.etypes_order_hint) |
289 for ext_entity in ext_entities: |
290 for ext_entity in ext_entities: |
290 # check data in the transitional representation and prepare it for |
291 # check data in the transitional representation and prepare it for |
291 # later insertion in the database |
292 # later insertion in the database |
292 for subject_uri, rtype, object_uri in ext_entity.prepare(schema): |
293 for subject_uri, rtype, object_uri in ext_entity.prepare(schema): |
293 deferred.setdefault(rtype, set()).add((subject_uri, object_uri)) |
294 deferred.setdefault(rtype, set()).add((subject_uri, object_uri)) |
294 if not ext_entity.is_ready(extid2eid): |
295 if not ext_entity.is_ready(extid2eid): |
295 queue.setdefault(ext_entity.etype, []).append(ext_entity) |
296 queue.setdefault(ext_entity.etype, []).append(ext_entity) |
296 continue |
297 continue |
297 yield ext_entity |
298 yield ext_entity |
|
299 if not queue: |
|
300 continue |
298 # check for some entities in the queue that may now be ready. We'll have to restart |
301 # check for some entities in the queue that may now be ready. We'll have to restart |
299 # search for ready entities until no one is generated |
302 # search for ready entities until no one is generated |
|
303 for etype in queue: |
|
304 if etype not in order_hint: |
|
305 order_hint.append(etype) |
300 new = True |
306 new = True |
301 while new: |
307 while new: |
302 new = False |
308 new = False |
303 for etype in self.etypes_order_hint: |
309 for etype in order_hint: |
304 if etype in queue: |
310 if etype in queue: |
305 new_queue = [] |
311 new_queue = [] |
306 for ext_entity in queue[etype]: |
312 for ext_entity in queue[etype]: |
307 if ext_entity.is_ready(extid2eid): |
313 if ext_entity.is_ready(extid2eid): |
308 yield ext_entity |
314 yield ext_entity |
342 existing = self.existing_relations[rtype] |
348 existing = self.existing_relations[rtype] |
343 for subject_uri, object_uri in relations: |
349 for subject_uri, object_uri in relations: |
344 try: |
350 try: |
345 subject_eid = extid2eid[subject_uri] |
351 subject_eid = extid2eid[subject_uri] |
346 object_eid = extid2eid[object_uri] |
352 object_eid = extid2eid[object_uri] |
347 except KeyError: |
353 except KeyError as exc: |
348 missing_relations.append((subject_uri, rtype, object_uri)) |
354 missing_relations.append((subject_uri, rtype, object_uri, exc)) |
349 continue |
355 continue |
350 if (subject_eid, object_eid) not in existing: |
356 if (subject_eid, object_eid) not in existing: |
351 prepare_insert_relation(subject_eid, rtype, object_eid) |
357 prepare_insert_relation(subject_eid, rtype, object_eid) |
352 existing.add((subject_eid, object_eid)) |
358 existing.add((subject_eid, object_eid)) |
353 if symmetric: |
359 if symmetric: |
365 map(error, msgs) |
371 map(error, msgs) |
366 if self.raise_on_error: |
372 if self.raise_on_error: |
367 raise Exception('\n'.join(msgs)) |
373 raise Exception('\n'.join(msgs)) |
368 if missing_relations: |
374 if missing_relations: |
369 msgs = ["can't create some relations, is there missing data?"] |
375 msgs = ["can't create some relations, is there missing data?"] |
370 for subject_uri, rtype, object_uri in missing_relations: |
376 for subject_uri, rtype, object_uri, exc in missing_relations: |
371 msgs.append("%s %s %s" % (subject_uri, rtype, object_uri)) |
377 msgs.append("Could not find %s when trying to insert (%s, %s, %s)" |
|
378 % (exc, subject_uri, rtype, object_uri)) |
372 map(error, msgs) |
379 map(error, msgs) |
373 if self.raise_on_error: |
380 if self.raise_on_error: |
374 raise Exception('\n'.join(msgs)) |
381 raise Exception('\n'.join(msgs)) |
375 |
382 |
376 |
383 |