317 """Yield external entities in an order which attempts to satisfy |
317 """Yield external entities in an order which attempts to satisfy |
318 schema constraints (inlined / cardinality) and to optimize the import. |
318 schema constraints (inlined / cardinality) and to optimize the import. |
319 """ |
319 """ |
320 schema = self.schema |
320 schema = self.schema |
321 extid2eid = self.extid2eid |
321 extid2eid = self.extid2eid |
|
322 order_hint = list(self.etypes_order_hint) |
322 for ext_entity in ext_entities: |
323 for ext_entity in ext_entities: |
323 # check data in the transitional representation and prepare it for |
324 # check data in the transitional representation and prepare it for |
324 # later insertion in the database |
325 # later insertion in the database |
325 for subject_uri, rtype, object_uri in ext_entity.prepare(schema): |
326 for subject_uri, rtype, object_uri in ext_entity.prepare(schema): |
326 deferred.setdefault(rtype, set()).add((subject_uri, object_uri)) |
327 deferred.setdefault(rtype, set()).add((subject_uri, object_uri)) |
327 if not ext_entity.is_ready(extid2eid): |
328 if not ext_entity.is_ready(extid2eid): |
328 queue.setdefault(ext_entity.etype, []).append(ext_entity) |
329 queue.setdefault(ext_entity.etype, []).append(ext_entity) |
329 continue |
330 continue |
330 yield ext_entity |
331 yield ext_entity |
|
332 if not queue: |
|
333 continue |
331 # check for some entities in the queue that may now be ready. We'll have to restart |
334 # check for some entities in the queue that may now be ready. We'll have to restart |
332 # search for ready entities until no one is generated |
335 # search for ready entities until no one is generated |
|
336 for etype in queue: |
|
337 if etype not in order_hint: |
|
338 order_hint.append(etype) |
333 new = True |
339 new = True |
334 while new: |
340 while new: |
335 new = False |
341 new = False |
336 for etype in self.etypes_order_hint: |
342 for etype in order_hint: |
337 if etype in queue: |
343 if etype in queue: |
338 new_queue = [] |
344 new_queue = [] |
339 for ext_entity in queue[etype]: |
345 for ext_entity in queue[etype]: |
340 if ext_entity.is_ready(extid2eid): |
346 if ext_entity.is_ready(extid2eid): |
341 yield ext_entity |
347 yield ext_entity |
375 existing = self.existing_relations[rtype] |
381 existing = self.existing_relations[rtype] |
376 for subject_uri, object_uri in relations: |
382 for subject_uri, object_uri in relations: |
377 try: |
383 try: |
378 subject_eid = extid2eid[subject_uri] |
384 subject_eid = extid2eid[subject_uri] |
379 object_eid = extid2eid[object_uri] |
385 object_eid = extid2eid[object_uri] |
380 except KeyError: |
386 except KeyError as exc: |
381 missing_relations.append((subject_uri, rtype, object_uri)) |
387 missing_relations.append((subject_uri, rtype, object_uri, exc)) |
382 continue |
388 continue |
383 if (subject_eid, object_eid) not in existing: |
389 if (subject_eid, object_eid) not in existing: |
384 prepare_insert_relation(subject_eid, rtype, object_eid) |
390 prepare_insert_relation(subject_eid, rtype, object_eid) |
385 existing.add((subject_eid, object_eid)) |
391 existing.add((subject_eid, object_eid)) |
386 if symmetric: |
392 if symmetric: |
398 map(error, msgs) |
404 map(error, msgs) |
399 if self.raise_on_error: |
405 if self.raise_on_error: |
400 raise Exception('\n'.join(msgs)) |
406 raise Exception('\n'.join(msgs)) |
401 if missing_relations: |
407 if missing_relations: |
402 msgs = ["can't create some relations, is there missing data?"] |
408 msgs = ["can't create some relations, is there missing data?"] |
403 for subject_uri, rtype, object_uri in missing_relations: |
409 for subject_uri, rtype, object_uri, exc in missing_relations: |
404 msgs.append("%s %s %s" % (subject_uri, rtype, object_uri)) |
410 msgs.append("Could not find %s when trying to insert (%s, %s, %s)" |
|
411 % (exc, subject_uri, rtype, object_uri)) |
405 map(error, msgs) |
412 map(error, msgs) |
406 if self.raise_on_error: |
413 if self.raise_on_error: |
407 raise Exception('\n'.join(msgs)) |
414 raise Exception('\n'.join(msgs)) |
408 |
415 |
409 |
416 |