[c-c] Add a command to check database index
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Fri, 17 Jun 2016 13:26:13 +0200 (2016-06-17)
changeset 11361 5a857bba1b79
parent 11360 49aca289134f
child 11362 ebe75d73acdd
[c-c] Add a command to check database index List indices expected by the schema but not in the database and vice-versa Closes #13822027
cubicweb/server/checkintegrity.py
cubicweb/server/serverctl.py
cubicweb/server/test/unittest_checkintegrity.py
--- a/cubicweb/server/checkintegrity.py	Wed Jun 22 07:57:13 2016 +0200
+++ b/cubicweb/server/checkintegrity.py	Fri Jun 17 13:26:13 2016 +0200
@@ -1,4 +1,4 @@
-# copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# copyright 2003-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
 #
 # This file is part of CubicWeb.
@@ -29,14 +29,20 @@
 
 from logilab.common.shellutils import ProgressBar
 
+from yams.constraints import UniqueConstraint
+
+from cubicweb.toolsutils import underline_title
 from cubicweb.schema import PURE_VIRTUAL_RTYPES, VIRTUAL_RTYPES, UNIQUE_CONSTRAINTS
 from cubicweb.server.sqlutils import SQL_PREFIX
+from cubicweb.server.schema2sql import iter_unique_index_names, build_index_name
+
 
 def notify_fixed(fix):
     if fix:
         sys.stderr.write(' [FIXED]')
     sys.stderr.write('\n')
 
+
 def has_eid(cnx, sqlcursor, eid, eids):
     """return true if the eid is a valid eid"""
     if eid in eids:
@@ -63,6 +69,7 @@
     eids[eid] = True
     return True
 
+
 # XXX move to yams?
 def etype_fti_containers(eschema, _done=None):
     if _done is None:
@@ -84,6 +91,7 @@
     else:
         yield eschema
 
+
 def reindex_entities(schema, cnx, withpb=True, etypes=None):
     """reindex all entities in the repository"""
     # deactivate modification_date hook since we don't want them
@@ -151,7 +159,6 @@
                 print('dunno how to fix, do it yourself')
 
 
-
 def check_text_index(schema, cnx, eids, fix=1):
     """check all entities registered in the text index"""
     print('Checking text index')
@@ -250,6 +257,7 @@
     sys.stderr.write(msg % (rtype, target, eid))
     notify_fixed(fix)
 
+
 def bad_inlined_msg(rtype, parent_eid, eid, fix):
     msg = ('  An inlined relation %s from %s to %s exists but the latter '
            'entity does not exist')
@@ -408,3 +416,93 @@
         cnx.rollback()
         reindex_entities(repo.schema, cnx, withpb=withpb)
         cnx.commit()
+
+
+SYSTEM_INDICES = {
+    # see cw/server/sources/native.py
+    'entities_type_idx': ('entities', 'type'),
+    'entities_extid_idx': ('entities', 'extid'),
+    'transactions_tx_time_idx': ('transactions', 'tx_time'),
+    'transactions_tx_user_idx': ('transactions', 'tx_user'),
+    'tx_entity_actions_txa_action_idx': ('tx_entity_actions', 'txa_action'),
+    'tx_entity_actions_txa_public_idx': ('tx_entity_actions', 'txa_public'),
+    'tx_entity_actions_eid_idx': ('tx_entity_actions', 'txa_eid'),
+    'tx_entity_actions_etype_idx': ('tx_entity_actions', 'txa_etype'),
+    'tx_entity_actions_tx_uuid_idx': ('tx_entity_actions', 'tx_uuid'),
+    'tx_relation_actions_txa_action_idx': ('tx_relation_actions', 'txa_action'),
+    'tx_relation_actions_txa_public_idx': ('tx_relation_actions', 'txa_public'),
+    'tx_relation_actions_eid_from_idx': ('tx_relation_actions', 'eid_from'),
+    'tx_relation_actions_eid_to_idx': ('tx_relation_actions', 'eid_to'),
+    'tx_relation_actions_tx_uuid_idx': ('tx_relation_actions', 'tx_uuid'),
+}
+
+
+def check_indexes(cnx):
+    """Check indexes of a system database: output missing expected indexes as well as unexpected ones.
+
+    Return 0 if there is no differences, else 1.
+    """
+    source = cnx.repo.system_source
+    dbh = source.dbhelper
+    schema = cnx.repo.schema
+    schema_indices = SYSTEM_INDICES.copy()
+    if source.dbdriver == 'postgres':
+        schema_indices.update({'appears_words_idx': ('appears', 'words'),
+                               'moved_entities_extid_key': ('moved_entities', 'extid')})
+        index_filter = lambda idx: not (idx.startswith('pg_') or idx.endswith('_pkey'))
+    else:
+        schema_indices.update({'appears_uid': ('appears', 'uid'),
+                               'appears_word_id': ('appears', 'word_id')})
+        index_filter = lambda idx: not idx.startswith('sqlite_')
+    db_indices = set(idx for idx in dbh.list_indices(cnx.cnxset.cu)
+                     if index_filter(idx))
+    for rschema in schema.relations():
+        if rschema.rule or rschema in PURE_VIRTUAL_RTYPES:
+            continue  # computed relation
+        if rschema.final or rschema.inlined:
+            for rdef in rschema.rdefs.values():
+                table = 'cw_{0}'.format(rdef.subject)
+                column = 'cw_{0}'.format(rdef.rtype)
+                if any(isinstance(cstr, UniqueConstraint) for cstr in rdef.constraints):
+                    schema_indices[dbh._index_name(table, column, unique=True)] = (
+                        table, [column])
+                if rschema.inlined or rdef.indexed:
+                    schema_indices[dbh._index_name(table, column)] = (table, [column])
+        else:
+            table = '{0}_relation'.format(rschema)
+            if source.dbdriver == 'postgres':
+                # index built after the primary key constraint
+                schema_indices[build_index_name(table, ['eid_from', 'eid_to'], 'key_')] = (
+                    table, ['eid_from', 'eid_to'])
+            schema_indices[build_index_name(table, ['eid_from'], 'idx_')] = (
+                table, ['eid_from'])
+            schema_indices[build_index_name(table, ['eid_to'], 'idx_')] = (
+                table, ['eid_to'])
+    for eschema in schema.entities():
+        if eschema.final:
+            continue
+        table = 'cw_{0}'.format(eschema)
+        for columns, index_name in iter_unique_index_names(eschema):
+            schema_indices[index_name] = (table, columns)
+
+    missing_indices = set(schema_indices) - db_indices
+    if missing_indices:
+        print(underline_title('Missing indices'))
+        print('index expected by the schema but not found in the database:\n')
+        missing = ['{0} ON {1[0]} {1[1]}'.format(idx, schema_indices[idx])
+                   for idx in missing_indices]
+        print('\n'.join(sorted(missing)))
+        print()
+        status = 1
+    additional_indices = db_indices - set(schema_indices)
+    if additional_indices:
+        print(underline_title('Additional indices'))
+        print('index in the database but not expected by the schema:\n')
+        print('\n'.join(sorted(additional_indices)))
+        print()
+        status = 1
+    if not (missing_indices or additional_indices):
+        print('Everything is Ok')
+        status = 0
+
+    return status
--- a/cubicweb/server/serverctl.py	Wed Jun 22 07:57:13 2016 +0200
+++ b/cubicweb/server/serverctl.py	Fri Jun 17 13:26:13 2016 +0200
@@ -944,6 +944,25 @@
                   self.config.autofix)
 
 
+class DBIndexSanityCheckCommand(Command):
+    """Check database indices of an instance.
+
+    <instance>
+      identifier of the instance to check
+    """
+    arguments = '<instance>'
+    name = 'db-check-index'
+    min_args = 1
+
+    def run(self, args):
+        from cubicweb.server.checkintegrity import check_indexes
+        config = ServerConfiguration.config_for(args[0])
+        repo, cnx = repo_cnx(config)
+        with cnx:
+            status = check_indexes(cnx)
+        sys.exit(status)
+
+
 class RebuildFTICommand(Command):
     """Rebuild the full-text index of the system database of an instance.
 
@@ -1075,7 +1094,7 @@
 
 for cmdclass in (CreateInstanceDBCommand, InitInstanceCommand,
                  GrantUserOnInstanceCommand, ResetAdminPasswordCommand,
-                 DBDumpCommand, DBRestoreCommand, DBCopyCommand,
+                 DBDumpCommand, DBRestoreCommand, DBCopyCommand, DBIndexSanityCheckCommand,
                  AddSourceCommand, CheckRepositoryCommand, RebuildFTICommand,
                  SynchronizeSourceCommand, SchemaDiffCommand,
                  ):
--- a/cubicweb/server/test/unittest_checkintegrity.py	Wed Jun 22 07:57:13 2016 +0200
+++ b/cubicweb/server/test/unittest_checkintegrity.py	Fri Jun 17 13:26:13 2016 +0200
@@ -1,4 +1,4 @@
-# copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# copyright 2003-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
 #
 # This file is part of CubicWeb.
@@ -17,6 +17,7 @@
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
 
 import sys
+import unittest
 
 from six import PY2
 if PY2:
@@ -24,13 +25,13 @@
 else:
     from io import StringIO
 
-from logilab.common.testlib import TestCase, unittest_main
-from cubicweb.devtools import get_test_db_handler, TestServerConfiguration
+from cubicweb.devtools import (PostgresApptestConfiguration, TestServerConfiguration,
+                               get_test_db_handler, startpgcluster, stoppgcluster)
+from cubicweb.devtools.testlib import CubicWebTC
+from cubicweb.server.checkintegrity import check, check_indexes, reindex_entities
 
 
-from cubicweb.server.checkintegrity import check, reindex_entities
-
-class CheckIntegrityTC(TestCase):
+class CheckIntegrityTC(unittest.TestCase):
 
     def setUp(self):
         handler = get_test_db_handler(TestServerConfiguration('data', __file__))
@@ -66,5 +67,32 @@
             self.assertTrue(cnx.execute('Any X WHERE X has_text "tutu"'))
             self.assertTrue(cnx.execute('Any X WHERE X has_text "toto"'))
 
+
+class SqliteCheckIndexesTC(CubicWebTC):
+
+    def test_check_indexes(self):
+        with self.admin_access.repo_cnx() as cnx:
+            sys.stdout = stream = StringIO()
+            try:
+                status = check_indexes(cnx)
+            finally:
+                sys.stdout = sys.__stdout__
+            self.assertEqual(status, 0, stream.getvalue())
+
+
+class PGCheckIndexesTC(SqliteCheckIndexesTC):
+    configcls = PostgresApptestConfiguration
+
+    @classmethod
+    def setUpClass(cls):
+        startpgcluster(__file__)
+        super(PGCheckIndexesTC, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        stoppgcluster(__file__)
+        super(PGCheckIndexesTC, cls).tearDownClass()
+
+
 if __name__ == '__main__':
-    unittest_main()
+    unittest.main()