[massive store] Rework constraint/index handling
The basic idea is to keep the primary constraint on entities.eid since it's
heavily used in metadata insertions. Other option would have been to drop /
recreate but its very costly on big database, and the index is used for
insertion into the entities table itself, so it's not worth droping it at a
first glance.
Also, keeping it avoids to systematically drop all constraints which depends on
it. We may thus now lazily drop constraints, only on insertion of some
etype/rtype for the related table.
Related to #15538359
--- a/cubicweb/dataimport/massive_store.py Tue Sep 27 12:02:07 2016 +0200
+++ b/cubicweb/dataimport/massive_store.py Wed Sep 28 08:57:48 2016 +0200
@@ -27,8 +27,6 @@
from yams.constraints import SizeConstraint
-from cubicweb.schema import PURE_VIRTUAL_RTYPES
-from cubicweb.server.schema2sql import rschema_has_table
from cubicweb.dataimport import stores, pgstore
@@ -110,29 +108,24 @@
# SQL utilities #########################################################
- def _drop_constraints(self):
- """Drop """
+ def _drop_metadata_constraints_if_necessary(self):
+ """Drop constraints and indexes for the metadata tables if necessary."""
if not self._constraints_dropped:
- # drop constraint and metadata table, they will be recreated when self.finish() is
- # called
- self._drop_all_constraints()
- self._drop_metatables_indexes()
+ self._drop_metadata_constraints()
self._constraints_dropped = True
- def _drop_all_constraints(self):
- etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities()
- if not eschema.final)
- rtypes_tables = ('%s_relation' % rschema.type.lower() for rschema in self.schema.relations()
- if rschema_has_table(rschema, skip_relations=PURE_VIRTUAL_RTYPES))
- for tablename in chain(etypes_tables, rtypes_tables, ('entities',)):
- self._dbh.drop_constraints(tablename)
+ def _drop_metadata_constraints(self):
+ """Drop constraints and indexes for the metadata tables.
- def _drop_metatables_indexes(self):
- """ Drop all the constraints for the meta data"""
+ They will be recreated by the `finish` method.
+ """
for tablename in ('created_by_relation', 'owned_by_relation',
- 'is_instance_of_relation', 'is_relation',
- 'entities'):
+ 'is_instance_of_relation', 'is_relation'):
+ self._dbh.drop_constraints(tablename)
self._dbh.drop_indexes(tablename)
+ # don't drop constraints for the entities table, the only one is the primary key's index on
+ # eid and we want to keep it
+ self._dbh.drop_indexes('entities')
def restart_eid_sequence(self, start_eid):
self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
@@ -147,8 +140,10 @@
"""
if not self.slave_mode and etype not in self._initialized:
self._initialized.add(etype)
- self._drop_constraints()
- self._dbh.drop_indexes('cw_%s' % etype.lower())
+ self._drop_metadata_constraints_if_necessary()
+ tablename = 'cw_%s' % etype.lower()
+ self._dbh.drop_constraints(tablename)
+ self._dbh.drop_indexes(tablename)
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
'(retype text, type varchar(128))')
self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype})
@@ -176,10 +171,12 @@
if not self.slave_mode and rtype not in self._initialized:
assert not self._cnx.vreg.schema.rschema(rtype).inlined
self._initialized.add(rtype)
- self._drop_constraints()
- self._dbh.drop_indexes('%s_relation' % rtype.lower())
- self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)'
- % rtype.lower())
+ self._drop_metadata_constraints_if_necessary()
+ tablename = '%s_relation' % rtype.lower()
+ self._dbh.drop_constraints(tablename)
+ self._dbh.drop_indexes(tablename)
+ self.sql('CREATE TABLE %s_tmp (eid_from integer, eid_to integer)'
+ % tablename)
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
'(retype text, type varchar(128))')
self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'rtype')", {'e': rtype})
--- a/cubicweb/dataimport/test/test_massive_store.py Tue Sep 27 12:02:07 2016 +0200
+++ b/cubicweb/dataimport/test/test_massive_store.py Wed Sep 28 08:57:48 2016 +0200
@@ -140,7 +140,7 @@
store._drop_constraints()
indexes = all_indexes(cnx)
- self.assertNotIn('entities_pkey', indexes)
+ self.assertIn('entities_pkey', indexes)
self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
indexes)
self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
@@ -249,7 +249,7 @@
# Check index
indexes = all_indexes(cnx)
- self.assertNotIn('entities_pkey', indexes)
+ self.assertIn('entities_pkey', indexes)
self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
indexes)
self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),