[massive store] Lazy removal of constraints and metadata indexes
They should not be removed during store's init, because we may want to query the
database with its index between store creation and call to prepare_insert* (e.g.
to build the extid2eid map).
Along the way:
* rename drop_metadata_constraints into drop_metadata_indexes, because that's
what it does
* rework a bit impacted tests
Closes #15538359
--- a/cubicweb/dataimport/massive_store.py Mon Oct 17 14:50:47 2016 +0200
+++ b/cubicweb/dataimport/massive_store.py Tue Sep 27 12:02:07 2016 +0200
@@ -97,11 +97,7 @@
self._data_relations = defaultdict(list)
self._initialized = set()
- if not self.slave_mode:
- # drop constraint and metadata table, they will be recreated when self.finish() is
- # called
- self._drop_all_constraints()
- self._drop_metatables_constraints()
+ self._constraints_dropped = self.slave_mode
def _get_eid_gen(self):
""" Function getting the next eid. This is done by preselecting
@@ -114,6 +110,15 @@
# SQL utilities #########################################################
+ def _drop_constraints(self):
+ """Drop """
+ if not self._constraints_dropped:
+ # drop constraint and metadata table, they will be recreated when self.finish() is
+ # called
+ self._drop_all_constraints()
+ self._drop_metatables_indexes()
+ self._constraints_dropped = True
+
def _drop_all_constraints(self):
etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities()
if not eschema.final)
@@ -122,7 +127,7 @@
for tablename in chain(etypes_tables, rtypes_tables, ('entities',)):
self._dbh.drop_constraints(tablename)
- def _drop_metatables_constraints(self):
+ def _drop_metatables_indexes(self):
""" Drop all the constraints for the meta data"""
for tablename in ('created_by_relation', 'owned_by_relation',
'is_instance_of_relation', 'is_relation',
@@ -142,6 +147,7 @@
"""
if not self.slave_mode and etype not in self._initialized:
self._initialized.add(etype)
+ self._drop_constraints()
self._dbh.drop_indexes('cw_%s' % etype.lower())
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
'(retype text, type varchar(128))')
@@ -170,6 +176,7 @@
if not self.slave_mode and rtype not in self._initialized:
assert not self._cnx.vreg.schema.rschema(rtype).inlined
self._initialized.add(rtype)
+ self._drop_constraints()
self._dbh.drop_indexes('%s_relation' % rtype.lower())
self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)'
% rtype.lower())
--- a/cubicweb/dataimport/test/test_massive_store.py Mon Oct 17 14:50:47 2016 +0200
+++ b/cubicweb/dataimport/test/test_massive_store.py Tue Sep 27 12:02:07 2016 +0200
@@ -35,6 +35,11 @@
stoppgcluster(__file__)
+def all_indexes(cnx):
+ crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
+ return set(r[0] for r in crs.fetchall())
+
+
class MassiveObjectStoreWithCustomMDGenStoreTC(
test_stores.NoHookRQLObjectStoreWithCustomMDGenStoreTC):
configcls = PostgresApptestConfiguration
@@ -118,30 +123,36 @@
'T name TN')[0]
self.assertEqual(cnx.entity_from_eid(eid).cw_etype, etname)
- def test_drop_index(self):
- with self.admin_access.repo_cnx() as cnx:
- store = MassiveObjectStore(cnx)
- cnx.commit()
+ def test_index_not_dropped_by_init(self):
with self.admin_access.repo_cnx() as cnx:
- crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
- indexes = [r[0] for r in crs.fetchall()]
- self.assertNotIn('entities_pkey', indexes)
- self.assertNotIn('owned_by_relation_pkey', indexes)
- self.assertNotIn('owned_by_relation_to_idx', indexes)
+ store = MassiveObjectStore(cnx) # noqa
+ cnx.commit()
+ indexes = all_indexes(cnx)
+ self.assertIn('entities_pkey', indexes)
+ self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
+ indexes)
+ self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
+ indexes)
def test_drop_index_recreation(self):
with self.admin_access.repo_cnx() as cnx:
store = MassiveObjectStore(cnx)
+
+ store._drop_constraints()
+ indexes = all_indexes(cnx)
+ self.assertNotIn('entities_pkey', indexes)
+ self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
+ indexes)
+ self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
+ indexes)
+
store.finish()
- cnx.commit()
- with self.admin_access.repo_cnx() as cnx:
- crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
- indexes = [r[0] for r in crs.fetchall()]
- self.assertIn('entities_pkey', indexes)
- self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
- indexes)
- self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
- indexes)
+ indexes = all_indexes(cnx)
+ self.assertIn('entities_pkey', indexes)
+ self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
+ indexes)
+ self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
+ indexes)
def test_eids_seq_range(self):
with self.admin_access.repo_cnx() as cnx:
@@ -212,18 +223,6 @@
store.flush()
self.assertEqual(next(counter), 1)
- def test_slave_mode_indexes(self):
- with self.admin_access.repo_cnx() as cnx:
- slave_store = MassiveObjectStore(cnx, slave_mode=True)
- with self.admin_access.repo_cnx() as cnx:
- crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
- indexes = [r[0] for r in crs.fetchall()]
- self.assertIn('entities_pkey', indexes)
- self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
- indexes)
- self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
- indexes)
-
def test_slave_mode_exception(self):
with self.admin_access.repo_cnx() as cnx:
slave_store = MassiveObjectStore(cnx, slave_mode=True)
@@ -249,8 +248,7 @@
store.flush()
# Check index
- crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
- indexes = [r[0] for r in crs.fetchall()]
+ indexes = all_indexes(cnx)
self.assertNotIn('entities_pkey', indexes)
self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
indexes)
@@ -261,8 +259,7 @@
store.finish()
# Check index again
- crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
- indexes = [r[0] for r in crs.fetchall()]
+ indexes = all_indexes(cnx)
self.assertIn('entities_pkey', indexes)
self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
indexes)