# HG changeset patch # User Sylvain Thénault # Date 1475045868 -7200 # Node ID 9847a097266e263c3e77f2b8fdcf35751e44f599 # Parent 5b535fe2f36432ff6d5e7b3e6e2780687d1d2a07 [massive store] Rework constraint/index handling The basic idea is to keep the primary constraint on entities.eid since it's heavily used in metadata insertions. Other option would have been to drop / recreate but its very costly on big database, and the index is used for insertion into the entities table itself, so it's not worth droping it at a first glance. Also, keeping it avoids to systematically drop all constraints which depends on it. We may thus now lazily drop constraints, only on insertion of some etype/rtype for the related table. Related to #15538359 diff -r 5b535fe2f364 -r 9847a097266e cubicweb/dataimport/massive_store.py --- a/cubicweb/dataimport/massive_store.py Tue Sep 27 12:02:07 2016 +0200 +++ b/cubicweb/dataimport/massive_store.py Wed Sep 28 08:57:48 2016 +0200 @@ -27,8 +27,6 @@ from yams.constraints import SizeConstraint -from cubicweb.schema import PURE_VIRTUAL_RTYPES -from cubicweb.server.schema2sql import rschema_has_table from cubicweb.dataimport import stores, pgstore @@ -110,29 +108,24 @@ # SQL utilities ######################################################### - def _drop_constraints(self): - """Drop """ + def _drop_metadata_constraints_if_necessary(self): + """Drop constraints and indexes for the metadata tables if necessary.""" if not self._constraints_dropped: - # drop constraint and metadata table, they will be recreated when self.finish() is - # called - self._drop_all_constraints() - self._drop_metatables_indexes() + self._drop_metadata_constraints() self._constraints_dropped = True - def _drop_all_constraints(self): - etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities() - if not eschema.final) - rtypes_tables = ('%s_relation' % rschema.type.lower() for rschema in self.schema.relations() - if rschema_has_table(rschema, skip_relations=PURE_VIRTUAL_RTYPES)) - for tablename in chain(etypes_tables, rtypes_tables, ('entities',)): - self._dbh.drop_constraints(tablename) + def _drop_metadata_constraints(self): + """Drop constraints and indexes for the metadata tables. - def _drop_metatables_indexes(self): - """ Drop all the constraints for the meta data""" + They will be recreated by the `finish` method. + """ for tablename in ('created_by_relation', 'owned_by_relation', - 'is_instance_of_relation', 'is_relation', - 'entities'): + 'is_instance_of_relation', 'is_relation'): + self._dbh.drop_constraints(tablename) self._dbh.drop_indexes(tablename) + # don't drop constraints for the entities table, the only one is the primary key's index on + # eid and we want to keep it + self._dbh.drop_indexes('entities') def restart_eid_sequence(self, start_eid): self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange( @@ -147,8 +140,10 @@ """ if not self.slave_mode and etype not in self._initialized: self._initialized.add(etype) - self._drop_constraints() - self._dbh.drop_indexes('cw_%s' % etype.lower()) + self._drop_metadata_constraints_if_necessary() + tablename = 'cw_%s' % etype.lower() + self._dbh.drop_constraints(tablename) + self._dbh.drop_indexes(tablename) self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized' '(retype text, type varchar(128))') self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype}) @@ -176,10 +171,12 @@ if not self.slave_mode and rtype not in self._initialized: assert not self._cnx.vreg.schema.rschema(rtype).inlined self._initialized.add(rtype) - self._drop_constraints() - self._dbh.drop_indexes('%s_relation' % rtype.lower()) - self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)' - % rtype.lower()) + self._drop_metadata_constraints_if_necessary() + tablename = '%s_relation' % rtype.lower() + self._dbh.drop_constraints(tablename) + self._dbh.drop_indexes(tablename) + self.sql('CREATE TABLE %s_tmp (eid_from integer, eid_to integer)' + % tablename) self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized' '(retype text, type varchar(128))') self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'rtype')", {'e': rtype}) diff -r 5b535fe2f364 -r 9847a097266e cubicweb/dataimport/test/test_massive_store.py --- a/cubicweb/dataimport/test/test_massive_store.py Tue Sep 27 12:02:07 2016 +0200 +++ b/cubicweb/dataimport/test/test_massive_store.py Wed Sep 28 08:57:48 2016 +0200 @@ -140,7 +140,7 @@ store._drop_constraints() indexes = all_indexes(cnx) - self.assertNotIn('entities_pkey', indexes) + self.assertIn('entities_pkey', indexes) self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), indexes) self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'), @@ -249,7 +249,7 @@ # Check index indexes = all_indexes(cnx) - self.assertNotIn('entities_pkey', indexes) + self.assertIn('entities_pkey', indexes) self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), indexes) self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),