# HG changeset patch # User Adrien Di Mascio # Date 1476364353 -7200 # Node ID 04607da552ac0b95cfc9e80740192ac2560c8dc3 # Parent 71df2811b4225e1c4d6f5bd9b8129a5b1b15535e [massive store] isolate SQL statements that one may want to customize i.e. provide public methods for meta data insertions diff -r 71df2811b422 -r 04607da552ac cubicweb/dataimport/massive_store.py --- a/cubicweb/dataimport/massive_store.py Thu Oct 06 12:12:04 2016 +0200 +++ b/cubicweb/dataimport/massive_store.py Thu Oct 13 15:12:33 2016 +0200 @@ -234,12 +234,7 @@ self._dbh.drop_indexes(tablename) for uuid in uuids: tmp_tablename = '%s_%s' % (tablename, uuid) - # XXX no index on the original relation table, EXISTS subquery may be sloooow - self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT ' - 'T.eid_from, T.eid_to FROM %(tmp_table)s AS T ' - 'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE ' - 'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);' - % {'table': tablename, 'tmp_table': tmp_tablename}) + self.fill_relation_table(tablename, tmp_tablename) self._tmp_data_cleanup(tmp_tablename, rtype, uuid) # restore all deleted indexes and constraints self._dbh.restore_indexes_and_constraints() @@ -252,20 +247,31 @@ """ # insert standard metadata relations for rtype, eid in self.metagen.base_etype_rels(etype).items(): - self._insert_meta_relation(tmp_tablename, rtype, eid) + self.fill_meta_relation_table(tmp_tablename, rtype, eid) # insert cw_source, is and is_instance_of relations (normally handled by the system source) - self._insert_meta_relation(tmp_tablename, 'cw_source', self.metagen.source.eid) + self.fill_meta_relation_table(tmp_tablename, 'cw_source', self.metagen.source.eid) eschema = self.schema[etype] - self._insert_meta_relation(tmp_tablename, 'is', eschema.eid) + self.fill_meta_relation_table(tmp_tablename, 'is', eschema.eid) for parent_eschema in chain(eschema.ancestors(), [eschema]): - self._insert_meta_relation(tmp_tablename, 'is_instance_of', parent_eschema.eid) + self.fill_meta_relation_table(tmp_tablename, 'is_instance_of', parent_eschema.eid) + self.fill_entities_table(etype, tmp_tablename) + + def fill_entities_table(self, etype, tmp_tablename): # finally insert records into the entities table self.sql("INSERT INTO entities(eid, type) " "SELECT cw_eid, '%s' FROM %s " "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" % (etype, tmp_tablename)) - def _insert_meta_relation(self, tmp_tablename, rtype, eid_to): + def fill_relation_table(self, tablename, tmp_tablename): + # XXX no index on the original relation table, EXISTS subquery may be sloooow + self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT ' + 'T.eid_from, T.eid_to FROM %(tmp_table)s AS T ' + 'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE ' + 'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);' + % {'table': tablename, 'tmp_table': tmp_tablename}) + + def fill_meta_relation_table(self, tmp_tablename, rtype, eid_to): self.sql("INSERT INTO %s_relation(eid_from, eid_to) SELECT cw_eid, %s FROM %s " "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" % (rtype, eid_to, tmp_tablename))