[massive store] Lazy removal of constraints and metadata indexes
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 27 Sep 2016 12:02:07 +0200
changeset 11777 5b535fe2f364
parent 11776 b49684ddd543
child 11778 9847a097266e
[massive store] Lazy removal of constraints and metadata indexes They should not be removed during store's init, because we may want to query the database with its index between store creation and call to prepare_insert* (e.g. to build the extid2eid map). Along the way: * rename drop_metadata_constraints into drop_metadata_indexes, because that's what it does * rework a bit impacted tests Closes #15538359
cubicweb/dataimport/massive_store.py
cubicweb/dataimport/test/test_massive_store.py
--- a/cubicweb/dataimport/massive_store.py	Mon Oct 17 14:50:47 2016 +0200
+++ b/cubicweb/dataimport/massive_store.py	Tue Sep 27 12:02:07 2016 +0200
@@ -97,11 +97,7 @@
         self._data_relations = defaultdict(list)
         self._initialized = set()
 
-        if not self.slave_mode:
-            # drop constraint and metadata table, they will be recreated when self.finish() is
-            # called
-            self._drop_all_constraints()
-            self._drop_metatables_constraints()
+        self._constraints_dropped = self.slave_mode
 
     def _get_eid_gen(self):
         """ Function getting the next eid. This is done by preselecting
@@ -114,6 +110,15 @@
 
     # SQL utilities #########################################################
 
+    def _drop_constraints(self):
+        """Drop """
+        if not self._constraints_dropped:
+            # drop constraint and metadata table, they will be recreated when self.finish() is
+            # called
+            self._drop_all_constraints()
+            self._drop_metatables_indexes()
+            self._constraints_dropped = True
+
     def _drop_all_constraints(self):
         etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities()
                          if not eschema.final)
@@ -122,7 +127,7 @@
         for tablename in chain(etypes_tables, rtypes_tables, ('entities',)):
             self._dbh.drop_constraints(tablename)
 
-    def _drop_metatables_constraints(self):
+    def _drop_metatables_indexes(self):
         """ Drop all the constraints for the meta data"""
         for tablename in ('created_by_relation', 'owned_by_relation',
                           'is_instance_of_relation', 'is_relation',
@@ -142,6 +147,7 @@
         """
         if not self.slave_mode and etype not in self._initialized:
             self._initialized.add(etype)
+            self._drop_constraints()
             self._dbh.drop_indexes('cw_%s' % etype.lower())
             self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
                      '(retype text, type varchar(128))')
@@ -170,6 +176,7 @@
         if not self.slave_mode and rtype not in self._initialized:
             assert not self._cnx.vreg.schema.rschema(rtype).inlined
             self._initialized.add(rtype)
+            self._drop_constraints()
             self._dbh.drop_indexes('%s_relation' % rtype.lower())
             self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)'
                      % rtype.lower())
--- a/cubicweb/dataimport/test/test_massive_store.py	Mon Oct 17 14:50:47 2016 +0200
+++ b/cubicweb/dataimport/test/test_massive_store.py	Tue Sep 27 12:02:07 2016 +0200
@@ -35,6 +35,11 @@
     stoppgcluster(__file__)
 
 
+def all_indexes(cnx):
+    crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
+    return set(r[0] for r in crs.fetchall())
+
+
 class MassiveObjectStoreWithCustomMDGenStoreTC(
         test_stores.NoHookRQLObjectStoreWithCustomMDGenStoreTC):
     configcls = PostgresApptestConfiguration
@@ -118,30 +123,36 @@
                                       'T name TN')[0]
             self.assertEqual(cnx.entity_from_eid(eid).cw_etype, etname)
 
-    def test_drop_index(self):
-        with self.admin_access.repo_cnx() as cnx:
-            store = MassiveObjectStore(cnx)
-            cnx.commit()
+    def test_index_not_dropped_by_init(self):
         with self.admin_access.repo_cnx() as cnx:
-            crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
-            indexes = [r[0] for r in crs.fetchall()]
-        self.assertNotIn('entities_pkey', indexes)
-        self.assertNotIn('owned_by_relation_pkey', indexes)
-        self.assertNotIn('owned_by_relation_to_idx', indexes)
+            store = MassiveObjectStore(cnx)  # noqa
+            cnx.commit()
+            indexes = all_indexes(cnx)
+            self.assertIn('entities_pkey', indexes)
+            self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
+                          indexes)
+            self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
+                          indexes)
 
     def test_drop_index_recreation(self):
         with self.admin_access.repo_cnx() as cnx:
             store = MassiveObjectStore(cnx)
+
+            store._drop_constraints()
+            indexes = all_indexes(cnx)
+            self.assertNotIn('entities_pkey', indexes)
+            self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
+                             indexes)
+            self.assertNotIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
+                             indexes)
+
             store.finish()
-            cnx.commit()
-        with self.admin_access.repo_cnx() as cnx:
-            crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
-            indexes = [r[0] for r in crs.fetchall()]
-        self.assertIn('entities_pkey', indexes)
-        self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
-                      indexes)
-        self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
-                      indexes)
+            indexes = all_indexes(cnx)
+            self.assertIn('entities_pkey', indexes)
+            self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
+                          indexes)
+            self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
+                          indexes)
 
     def test_eids_seq_range(self):
         with self.admin_access.repo_cnx() as cnx:
@@ -212,18 +223,6 @@
             store.flush()
         self.assertEqual(next(counter), 1)
 
-    def test_slave_mode_indexes(self):
-        with self.admin_access.repo_cnx() as cnx:
-            slave_store = MassiveObjectStore(cnx, slave_mode=True)
-        with self.admin_access.repo_cnx() as cnx:
-            crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
-            indexes = [r[0] for r in crs.fetchall()]
-        self.assertIn('entities_pkey', indexes)
-        self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
-                      indexes)
-        self.assertIn(build_index_name('owned_by_relation', ['eid_from'], 'idx_'),
-                      indexes)
-
     def test_slave_mode_exception(self):
         with self.admin_access.repo_cnx() as cnx:
             slave_store = MassiveObjectStore(cnx, slave_mode=True)
@@ -249,8 +248,7 @@
             store.flush()
 
             # Check index
-            crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
-            indexes = [r[0] for r in crs.fetchall()]
+            indexes = all_indexes(cnx)
             self.assertNotIn('entities_pkey', indexes)
             self.assertNotIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
                              indexes)
@@ -261,8 +259,7 @@
             store.finish()
 
             # Check index again
-            crs = cnx.system_sql('SELECT indexname FROM pg_indexes')
-            indexes = [r[0] for r in crs.fetchall()]
+            indexes = all_indexes(cnx)
             self.assertIn('entities_pkey', indexes)
             self.assertIn(build_index_name('owned_by_relation', ['eid_from', 'eid_to'], 'key_'),
                           indexes)