# HG changeset patch # User Sylvain Thénault # Date 1474970527 -7200 # Node ID 5b535fe2f36432ff6d5e7b3e6e2780687d1d2a07 # Parent b49684ddd54376f3731c3da8103f62528408b1b9 [massive store] Lazy removal of constraints and metadata indexes They should not be removed during store's init, because we may want to query the database with its index between store creation and call to prepare_insert* (e.g. to build the extid2eid map). Along the way: * rename drop_metadata_constraints into drop_metadata_indexes, because that's what it does * rework a bit impacted tests Closes #15538359 diff -r b49684ddd543 -r 5b535fe2f364 cubicweb/dataimport/massive_store.py --- a/cubicweb/dataimport/massive_store.py Mon Oct 17 14:50:47 2016 +0200 +++ b/cubicweb/dataimport/massive_store.py Tue Sep 27 12:02:07 2016 +0200 @@ -97,11 +97,7 @@ self._data_relations = defaultdict(list) self._initialized = set() - if not self.slave_mode: - # drop constraint and metadata table, they will be recreated when self.finish() is - # called - self._drop_all_constraints() - self._drop_metatables_constraints() + self._constraints_dropped = self.slave_mode def _get_eid_gen(self): """ Function getting the next eid. This is done by preselecting @@ -114,6 +110,15 @@ # SQL utilities ######################################################### + def _drop_constraints(self): + """Drop """ + if not self._constraints_dropped: + # drop constraint and metadata table, they will be recreated when self.finish() is + # called + self._drop_all_constraints() + self._drop_metatables_indexes() + self._constraints_dropped = True + def _drop_all_constraints(self): etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities() if not eschema.final) @@ -122,7 +127,7 @@ for tablename in chain(etypes_tables, rtypes_tables, ('entities',)): self._dbh.drop_constraints(tablename) - def _drop_metatables_constraints(self): + def _drop_metatables_indexes(self): """ Drop all the constraints for the meta data""" for tablename in ('created_by_relation', 'owned_by_relation', 'is_instance_of_relation', 'is_relation', @@ -142,6 +147,7 @@ """ if not self.slave_mode and etype not in self._initialized: self._initialized.add(etype) + self._drop_constraints() self._dbh.drop_indexes('cw_%s' % etype.lower()) self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized' '(retype text, type varchar(128))') @@ -170,6 +176,7 @@ if not self.slave_mode and rtype not in self._initialized: assert not self._cnx.vreg.schema.rschema(rtype).inlined self._initialized.add(rtype) + self._drop_constraints() self._dbh.drop_indexes('%s_relation' % rtype.lower()) self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)' % rtype.lower()) diff -r b49684ddd543 -r 5b535fe2f364 cubicweb/dataimport/test/test_massive_store.py --- a/cubicweb/dataimport/test/test_massive_store.py Mon Oct 17 14:50:47 2016 +0200 +++ b/cubicweb/dataimport/test/test_massive_store.py Tue Sep 27 12:02:07 2016 +0200 @@ -35,6 +35,11 @@ stoppgcluster(__file__) +def all_indexes(cnx): + crs = cnx.system_sql('SELECT indexname FROM pg_indexes') + return set(r[0] for r in crs.fetchall()) + + class MassiveObjectStoreWithCustomMDGenStoreTC( test_stores.NoHookRQLObjectStoreWithCustomMDGenStoreTC): configcls = PostgresApptestConfiguration @@ -118,30 +123,36 @@ 'T name TN')[0] self.assertEqual(cnx.entity_from_eid(eid).cw_etype, etname) - def test_drop_index(self): - with self.admin_access.repo_cnx() as cnx: - store = MassiveObjectStore(cnx) - cnx.commit() + def test_index_not_dropped_by_init(self): with self.admin_access.repo_cnx() as cnx: - crs = cnx.system_sql('SELECT indexname FROM pg_indexes') - indexes = [r[0] for r in crs.fetchall()] - self.assertNotIn('entities_pkey', indexes) - self.assertNotIn('owned_by_relation_pkey', indexes) - self.assertNotIn('owned_by_relation_to_idx', indexes) + store = MassiveObjectStore(cnx) # noqa + cnx.commit() + indexes = all_indexes(cnx) + self.assertIn('entities_pkey', indexes) + self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), + indexes) + self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'), + indexes) def test_drop_index_recreation(self): with self.admin_access.repo_cnx() as cnx: store = MassiveObjectStore(cnx) + + store._drop_constraints() + indexes = all_indexes(cnx) + self.assertNotIn('entities_pkey', indexes) + self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), + indexes) + self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'), + indexes) + store.finish() - cnx.commit() - with self.admin_access.repo_cnx() as cnx: - crs = cnx.system_sql('SELECT indexname FROM pg_indexes') - indexes = [r[0] for r in crs.fetchall()] - self.assertIn('entities_pkey', indexes) - self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), - indexes) - self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'), - indexes) + indexes = all_indexes(cnx) + self.assertIn('entities_pkey', indexes) + self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), + indexes) + self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'), + indexes) def test_eids_seq_range(self): with self.admin_access.repo_cnx() as cnx: @@ -212,18 +223,6 @@ store.flush() self.assertEqual(next(counter), 1) - def test_slave_mode_indexes(self): - with self.admin_access.repo_cnx() as cnx: - slave_store = MassiveObjectStore(cnx, slave_mode=True) - with self.admin_access.repo_cnx() as cnx: - crs = cnx.system_sql('SELECT indexname FROM pg_indexes') - indexes = [r[0] for r in crs.fetchall()] - self.assertIn('entities_pkey', indexes) - self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), - indexes) - self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'), - indexes) - def test_slave_mode_exception(self): with self.admin_access.repo_cnx() as cnx: slave_store = MassiveObjectStore(cnx, slave_mode=True) @@ -249,8 +248,7 @@ store.flush() # Check index - crs = cnx.system_sql('SELECT indexname FROM pg_indexes') - indexes = [r[0] for r in crs.fetchall()] + indexes = all_indexes(cnx) self.assertNotIn('entities_pkey', indexes) self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), indexes) @@ -261,8 +259,7 @@ store.finish() # Check index again - crs = cnx.system_sql('SELECT indexname FROM pg_indexes') - indexes = [r[0] for r in crs.fetchall()] + indexes = all_indexes(cnx) self.assertIn('entities_pkey', indexes) self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'), indexes)