cubicweb/dataimport/massive_store.py
changeset 11790 04607da552ac
parent 11789 71df2811b422
child 11791 20555214576b
equal deleted inserted replaced
11789:71df2811b422 11790:04607da552ac
   232                 tablename = '%s_relation' % rtype.lower()
   232                 tablename = '%s_relation' % rtype.lower()
   233                 self._dbh.drop_constraints(tablename)
   233                 self._dbh.drop_constraints(tablename)
   234                 self._dbh.drop_indexes(tablename)
   234                 self._dbh.drop_indexes(tablename)
   235                 for uuid in uuids:
   235                 for uuid in uuids:
   236                     tmp_tablename = '%s_%s' % (tablename, uuid)
   236                     tmp_tablename = '%s_%s' % (tablename, uuid)
   237                     # XXX no index on the original relation table, EXISTS subquery may be sloooow
   237                     self.fill_relation_table(tablename, tmp_tablename)
   238                     self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT '
       
   239                              'T.eid_from, T.eid_to FROM %(tmp_table)s AS T '
       
   240                              'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE '
       
   241                              'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);'
       
   242                              % {'table': tablename, 'tmp_table': tmp_tablename})
       
   243                     self._tmp_data_cleanup(tmp_tablename, rtype, uuid)
   238                     self._tmp_data_cleanup(tmp_tablename, rtype, uuid)
   244         # restore all deleted indexes and constraints
   239         # restore all deleted indexes and constraints
   245         self._dbh.restore_indexes_and_constraints()
   240         self._dbh.restore_indexes_and_constraints()
   246         # delete the meta data table
   241         # delete the meta data table
   247         self.sql('DROP TABLE IF EXISTS cwmassive_initialized')
   242         self.sql('DROP TABLE IF EXISTS cwmassive_initialized')
   250     def _insert_etype_metadata(self, etype, tmp_tablename):
   245     def _insert_etype_metadata(self, etype, tmp_tablename):
   251         """Massive insertion of meta data for `etype`, with new entities in `tmp_tablename`.
   246         """Massive insertion of meta data for `etype`, with new entities in `tmp_tablename`.
   252         """
   247         """
   253         # insert standard metadata relations
   248         # insert standard metadata relations
   254         for rtype, eid in self.metagen.base_etype_rels(etype).items():
   249         for rtype, eid in self.metagen.base_etype_rels(etype).items():
   255             self._insert_meta_relation(tmp_tablename, rtype, eid)
   250             self.fill_meta_relation_table(tmp_tablename, rtype, eid)
   256         # insert cw_source, is and is_instance_of relations (normally handled by the system source)
   251         # insert cw_source, is and is_instance_of relations (normally handled by the system source)
   257         self._insert_meta_relation(tmp_tablename, 'cw_source', self.metagen.source.eid)
   252         self.fill_meta_relation_table(tmp_tablename, 'cw_source', self.metagen.source.eid)
   258         eschema = self.schema[etype]
   253         eschema = self.schema[etype]
   259         self._insert_meta_relation(tmp_tablename, 'is', eschema.eid)
   254         self.fill_meta_relation_table(tmp_tablename, 'is', eschema.eid)
   260         for parent_eschema in chain(eschema.ancestors(), [eschema]):
   255         for parent_eschema in chain(eschema.ancestors(), [eschema]):
   261             self._insert_meta_relation(tmp_tablename, 'is_instance_of', parent_eschema.eid)
   256             self.fill_meta_relation_table(tmp_tablename, 'is_instance_of', parent_eschema.eid)
       
   257         self.fill_entities_table(etype, tmp_tablename)
       
   258 
       
   259     def fill_entities_table(self, etype, tmp_tablename):
   262         # finally insert records into the entities table
   260         # finally insert records into the entities table
   263         self.sql("INSERT INTO entities(eid, type) "
   261         self.sql("INSERT INTO entities(eid, type) "
   264                  "SELECT cw_eid, '%s' FROM %s "
   262                  "SELECT cw_eid, '%s' FROM %s "
   265                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
   263                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
   266                  % (etype, tmp_tablename))
   264                  % (etype, tmp_tablename))
   267 
   265 
   268     def _insert_meta_relation(self, tmp_tablename, rtype, eid_to):
   266     def fill_relation_table(self, tablename, tmp_tablename):
       
   267         # XXX no index on the original relation table, EXISTS subquery may be sloooow
       
   268         self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT '
       
   269                  'T.eid_from, T.eid_to FROM %(tmp_table)s AS T '
       
   270                  'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE '
       
   271                  'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);'
       
   272                  % {'table': tablename, 'tmp_table': tmp_tablename})
       
   273 
       
   274     def fill_meta_relation_table(self, tmp_tablename, rtype, eid_to):
   269         self.sql("INSERT INTO %s_relation(eid_from, eid_to) SELECT cw_eid, %s FROM %s "
   275         self.sql("INSERT INTO %s_relation(eid_from, eid_to) SELECT cw_eid, %s FROM %s "
   270                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
   276                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
   271                  % (rtype, eid_to, tmp_tablename))
   277                  % (rtype, eid_to, tmp_tablename))
   272 
   278 
   273     def _tmp_data_cleanup(self, tmp_tablename, ertype, uuid):
   279     def _tmp_data_cleanup(self, tmp_tablename, ertype, uuid):