server/checkintegrity.py
changeset 11057 0b59724cb3f2
parent 11052 058bb3dc685f
child 11058 23eb30449fe5
equal deleted inserted replaced
11052:058bb3dc685f 11057:0b59724cb3f2
     1 # copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     3 #
       
     4 # This file is part of CubicWeb.
       
     5 #
       
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
       
     7 # terms of the GNU Lesser General Public License as published by the Free
       
     8 # Software Foundation, either version 2.1 of the License, or (at your option)
       
     9 # any later version.
       
    10 #
       
    11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT
       
    12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
       
    13 # FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
       
    14 # details.
       
    15 #
       
    16 # You should have received a copy of the GNU Lesser General Public License along
       
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
       
    18 """Integrity checking tool for instances:
       
    19 
       
    20 * integrity of a CubicWeb repository. Hum actually only the system database is
       
    21   checked.
       
    22 """
       
    23 from __future__ import print_function
       
    24 
       
    25 __docformat__ = "restructuredtext en"
       
    26 
       
    27 import sys
       
    28 from datetime import datetime
       
    29 
       
    30 from logilab.common.shellutils import ProgressBar
       
    31 
       
    32 from cubicweb.schema import PURE_VIRTUAL_RTYPES, VIRTUAL_RTYPES, UNIQUE_CONSTRAINTS
       
    33 from cubicweb.server.sqlutils import SQL_PREFIX
       
    34 
       
    35 def notify_fixed(fix):
       
    36     if fix:
       
    37         sys.stderr.write(' [FIXED]')
       
    38     sys.stderr.write('\n')
       
    39 
       
    40 def has_eid(cnx, sqlcursor, eid, eids):
       
    41     """return true if the eid is a valid eid"""
       
    42     if eid in eids:
       
    43         return eids[eid]
       
    44     sqlcursor.execute('SELECT type FROM entities WHERE eid=%s' % eid)
       
    45     try:
       
    46         etype = sqlcursor.fetchone()[0]
       
    47     except Exception:
       
    48         eids[eid] = False
       
    49         return False
       
    50     if etype not in cnx.vreg.schema:
       
    51         eids[eid] = False
       
    52         return False
       
    53     sqlcursor.execute('SELECT * FROM %s%s WHERE %seid=%s' % (SQL_PREFIX, etype,
       
    54                                                              SQL_PREFIX, eid))
       
    55     result = sqlcursor.fetchall()
       
    56     if len(result) == 0:
       
    57         eids[eid] = False
       
    58         return False
       
    59     elif len(result) > 1:
       
    60         msg = ('  More than one entity with eid %s exists in source!\n'
       
    61                '  WARNING : Unable to fix this, do it yourself!\n')
       
    62         sys.stderr.write(msg % eid)
       
    63     eids[eid] = True
       
    64     return True
       
    65 
       
    66 # XXX move to yams?
       
    67 def etype_fti_containers(eschema, _done=None):
       
    68     if _done is None:
       
    69         _done = set()
       
    70     _done.add(eschema)
       
    71     containers = tuple(eschema.fulltext_containers())
       
    72     if containers:
       
    73         for rschema, target in containers:
       
    74             if target == 'object':
       
    75                 targets = rschema.objects(eschema)
       
    76             else:
       
    77                 targets = rschema.subjects(eschema)
       
    78             for targeteschema in targets:
       
    79                 if targeteschema in _done:
       
    80                     continue
       
    81                 _done.add(targeteschema)
       
    82                 for container in etype_fti_containers(targeteschema, _done):
       
    83                     yield container
       
    84     else:
       
    85         yield eschema
       
    86 
       
    87 def reindex_entities(schema, cnx, withpb=True, etypes=None):
       
    88     """reindex all entities in the repository"""
       
    89     # deactivate modification_date hook since we don't want them
       
    90     # to be updated due to the reindexation
       
    91     repo = cnx.repo
       
    92     dbhelper = repo.system_source.dbhelper
       
    93     cursor = cnx.cnxset.cu
       
    94     if not dbhelper.has_fti_table(cursor):
       
    95         print('no text index table')
       
    96         dbhelper.init_fti(cursor)
       
    97     repo.system_source.do_fti = True  # ensure full-text indexation is activated
       
    98     if etypes is None:
       
    99         print('Reindexing entities')
       
   100         etypes = set()
       
   101         for eschema in schema.entities():
       
   102             if eschema.final:
       
   103                 continue
       
   104             indexable_attrs = tuple(eschema.indexable_attributes()) # generator
       
   105             if not indexable_attrs:
       
   106                 continue
       
   107             for container in etype_fti_containers(eschema):
       
   108                 etypes.add(container)
       
   109         # clear fti table first
       
   110         cnx.system_sql('DELETE FROM %s' % dbhelper.fti_table)
       
   111     else:
       
   112         print('Reindexing entities of type %s' % \
       
   113               ', '.join(sorted(str(e) for e in etypes)))
       
   114         # clear fti table first. Use subquery for sql compatibility
       
   115         cnx.system_sql("DELETE FROM %s WHERE EXISTS(SELECT 1 FROM ENTITIES "
       
   116                        "WHERE eid=%s AND type IN (%s))" % (
       
   117                            dbhelper.fti_table, dbhelper.fti_uid_attr,
       
   118                            ','.join("'%s'" % etype for etype in etypes)))
       
   119     if withpb:
       
   120         pb = ProgressBar(len(etypes) + 1)
       
   121         pb.update()
       
   122     # reindex entities by generating rql queries which set all indexable
       
   123     # attribute to their current value
       
   124     source = repo.system_source
       
   125     for eschema in etypes:
       
   126         etype_class = cnx.vreg['etypes'].etype_class(str(eschema))
       
   127         for rset in etype_class.cw_fti_index_rql_limit(cnx):
       
   128             source.fti_index_entities(cnx, rset.entities())
       
   129             # clear entity cache to avoid high memory consumption on big tables
       
   130             cnx.drop_entity_cache()
       
   131         if withpb:
       
   132             pb.update()
       
   133     if withpb:
       
   134         pb.finish()
       
   135 
       
   136 
       
   137 def check_schema(schema, cnx, eids, fix=1):
       
   138     """check serialized schema"""
       
   139     print('Checking serialized schema')
       
   140     rql = ('Any COUNT(X),RN,SN,ON,CTN GROUPBY RN,SN,ON,CTN ORDERBY 1 '
       
   141            'WHERE X is CWConstraint, R constrained_by X, '
       
   142            'R relation_type RT, RT name RN, R from_entity ST, ST name SN, '
       
   143            'R to_entity OT, OT name ON, X cstrtype CT, CT name CTN')
       
   144     for count, rn, sn, on, cstrname in cnx.execute(rql):
       
   145         if count == 1:
       
   146             continue
       
   147         if cstrname in UNIQUE_CONSTRAINTS:
       
   148             print("ERROR: got %s %r constraints on relation %s.%s.%s" % (
       
   149                 count, cstrname, sn, rn, on))
       
   150             if fix:
       
   151                 print('dunno how to fix, do it yourself')
       
   152 
       
   153 
       
   154 
       
   155 def check_text_index(schema, cnx, eids, fix=1):
       
   156     """check all entities registered in the text index"""
       
   157     print('Checking text index')
       
   158     msg = '  Entity with eid %s exists in the text index but in no source (autofix will remove from text index)'
       
   159     cursor = cnx.system_sql('SELECT uid FROM appears;')
       
   160     for row in cursor.fetchall():
       
   161         eid = row[0]
       
   162         if not has_eid(cnx, cursor, eid, eids):
       
   163             sys.stderr.write(msg % eid)
       
   164             if fix:
       
   165                 cnx.system_sql('DELETE FROM appears WHERE uid=%s;' % eid)
       
   166             notify_fixed(fix)
       
   167 
       
   168 
       
   169 def check_entities(schema, cnx, eids, fix=1):
       
   170     """check all entities registered in the repo system table"""
       
   171     print('Checking entities system table')
       
   172     # system table but no source
       
   173     msg = '  Entity %s with eid %s exists in the system table but in no source (autofix will delete the entity)'
       
   174     cursor = cnx.system_sql('SELECT eid,type FROM entities;')
       
   175     for row in cursor.fetchall():
       
   176         eid, etype = row
       
   177         if not has_eid(cnx, cursor, eid, eids):
       
   178             sys.stderr.write(msg % (etype, eid))
       
   179             if fix:
       
   180                 cnx.system_sql('DELETE FROM entities WHERE eid=%s;' % eid)
       
   181             notify_fixed(fix)
       
   182     # source in entities, but no relation cw_source
       
   183     # XXX this (get_versions) requires a second connection to the db when we already have one open
       
   184     applcwversion = cnx.repo.get_versions().get('cubicweb')
       
   185     if applcwversion >= (3, 13, 1): # entities.asource appeared in 3.13.1
       
   186         cursor = cnx.system_sql('SELECT e.eid FROM entities as e, cw_CWSource as s '
       
   187                                     'WHERE s.cw_name=e.asource AND '
       
   188                                     'NOT EXISTS(SELECT 1 FROM cw_source_relation as cs '
       
   189                                     '  WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) '
       
   190                                     'ORDER BY e.eid')
       
   191         msg = ('  Entity with eid %s refers to source in entities table, '
       
   192                'but is missing relation cw_source (autofix will create the relation)\n')
       
   193         for row in cursor.fetchall():
       
   194             sys.stderr.write(msg % row[0])
       
   195         if fix:
       
   196             cnx.system_sql('INSERT INTO cw_source_relation (eid_from, eid_to) '
       
   197                                'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWSource as s '
       
   198                                'WHERE s.cw_name=e.asource AND NOT EXISTS(SELECT 1 FROM cw_source_relation as cs '
       
   199                                '  WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)')
       
   200             notify_fixed(True)
       
   201     # inconsistencies for 'is'
       
   202     msg = '  %s #%s is missing relation "is" (autofix will create the relation)\n'
       
   203     cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s '
       
   204                                 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs '
       
   205                                 '  WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) '
       
   206                                 'ORDER BY e.eid')
       
   207     for row in cursor.fetchall():
       
   208         sys.stderr.write(msg % tuple(row))
       
   209     if fix:
       
   210         cnx.system_sql('INSERT INTO is_relation (eid_from, eid_to) '
       
   211                            'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s '
       
   212                            'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs '
       
   213                            '  WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)')
       
   214         notify_fixed(True)
       
   215     # inconsistencies for 'is_instance_of'
       
   216     msg = '  %s #%s is missing relation "is_instance_of" (autofix will create the relation)\n'
       
   217     cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s '
       
   218                                 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs '
       
   219                                 '  WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) '
       
   220                                 'ORDER BY e.eid')
       
   221     for row in cursor.fetchall():
       
   222         sys.stderr.write(msg % tuple(row))
       
   223     if fix:
       
   224         cnx.system_sql('INSERT INTO is_instance_of_relation (eid_from, eid_to) '
       
   225                            'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s '
       
   226                            'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs '
       
   227                            '  WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)')
       
   228         notify_fixed(True)
       
   229     print('Checking entities tables')
       
   230     msg = '  Entity with eid %s exists in the %s table but not in the system table (autofix will delete the entity)'
       
   231     for eschema in schema.entities():
       
   232         if eschema.final:
       
   233             continue
       
   234         table = SQL_PREFIX + eschema.type
       
   235         column = SQL_PREFIX +  'eid'
       
   236         cursor = cnx.system_sql('SELECT %s FROM %s;' % (column, table))
       
   237         for row in cursor.fetchall():
       
   238             eid = row[0]
       
   239             # eids is full since we have fetched everything from the entities table,
       
   240             # no need to call has_eid
       
   241             if not eid in eids or not eids[eid]:
       
   242                 sys.stderr.write(msg % (eid, eschema.type))
       
   243                 if fix:
       
   244                     cnx.system_sql('DELETE FROM %s WHERE %s=%s;' % (table, column, eid))
       
   245                 notify_fixed(fix)
       
   246 
       
   247 
       
   248 def bad_related_msg(rtype, target, eid, fix):
       
   249     msg = '  A relation %s with %s eid %s exists but no such entity in sources'
       
   250     sys.stderr.write(msg % (rtype, target, eid))
       
   251     notify_fixed(fix)
       
   252 
       
   253 def bad_inlined_msg(rtype, parent_eid, eid, fix):
       
   254     msg = ('  An inlined relation %s from %s to %s exists but the latter '
       
   255            'entity does not exist')
       
   256     sys.stderr.write(msg % (rtype, parent_eid, eid))
       
   257     notify_fixed(fix)
       
   258 
       
   259 
       
   260 def check_relations(schema, cnx, eids, fix=1):
       
   261     """check that eids referenced by relations are registered in the repo system
       
   262     table
       
   263     """
       
   264     print('Checking relations')
       
   265     for rschema in schema.relations():
       
   266         if rschema.final or rschema.type in PURE_VIRTUAL_RTYPES:
       
   267             continue
       
   268         if rschema.inlined:
       
   269             for subjtype in rschema.subjects():
       
   270                 table = SQL_PREFIX + str(subjtype)
       
   271                 column = SQL_PREFIX +  str(rschema)
       
   272                 sql = 'SELECT cw_eid,%s FROM %s WHERE %s IS NOT NULL;' % (
       
   273                     column, table, column)
       
   274                 cursor = cnx.system_sql(sql)
       
   275                 for row in cursor.fetchall():
       
   276                     parent_eid, eid = row
       
   277                     if not has_eid(cnx, cursor, eid, eids):
       
   278                         bad_inlined_msg(rschema, parent_eid, eid, fix)
       
   279                         if fix:
       
   280                             sql = 'UPDATE %s SET %s=NULL WHERE %s=%s;' % (
       
   281                                 table, column, column, eid)
       
   282                             cnx.system_sql(sql)
       
   283             continue
       
   284         try:
       
   285             cursor = cnx.system_sql('SELECT eid_from FROM %s_relation;' % rschema)
       
   286         except Exception as ex:
       
   287             # usually because table doesn't exist
       
   288             print('ERROR', ex)
       
   289             continue
       
   290         for row in cursor.fetchall():
       
   291             eid = row[0]
       
   292             if not has_eid(cnx, cursor, eid, eids):
       
   293                 bad_related_msg(rschema, 'subject', eid, fix)
       
   294                 if fix:
       
   295                     sql = 'DELETE FROM %s_relation WHERE eid_from=%s;' % (
       
   296                         rschema, eid)
       
   297                     cnx.system_sql(sql)
       
   298         cursor = cnx.system_sql('SELECT eid_to FROM %s_relation;' % rschema)
       
   299         for row in cursor.fetchall():
       
   300             eid = row[0]
       
   301             if not has_eid(cnx, cursor, eid, eids):
       
   302                 bad_related_msg(rschema, 'object', eid, fix)
       
   303                 if fix:
       
   304                     sql = 'DELETE FROM %s_relation WHERE eid_to=%s;' % (
       
   305                         rschema, eid)
       
   306                     cnx.system_sql(sql)
       
   307 
       
   308 
       
   309 def check_mandatory_relations(schema, cnx, eids, fix=1):
       
   310     """check entities missing some mandatory relation"""
       
   311     print('Checking mandatory relations')
       
   312     msg = '%s #%s is missing mandatory %s relation %s (autofix will delete the entity)'
       
   313     for rschema in schema.relations():
       
   314         if rschema.final or rschema in PURE_VIRTUAL_RTYPES or rschema in ('is', 'is_instance_of'):
       
   315             continue
       
   316         smandatory = set()
       
   317         omandatory = set()
       
   318         for rdef in rschema.rdefs.values():
       
   319             if rdef.cardinality[0] in '1+':
       
   320                 smandatory.add(rdef.subject)
       
   321             if rdef.cardinality[1] in '1+':
       
   322                 omandatory.add(rdef.object)
       
   323         for role, etypes in (('subject', smandatory), ('object', omandatory)):
       
   324             for etype in etypes:
       
   325                 if role == 'subject':
       
   326                     rql = 'Any X WHERE NOT X %s Y, X is %s' % (rschema, etype)
       
   327                 else:
       
   328                     rql = 'Any X WHERE NOT Y %s X, X is %s' % (rschema, etype)
       
   329                 for entity in cnx.execute(rql).entities():
       
   330                     sys.stderr.write(msg % (entity.cw_etype, entity.eid, role, rschema))
       
   331                     if fix:
       
   332                         #if entity.cw_describe()['source']['uri'] == 'system': XXX
       
   333                         entity.cw_delete() # XXX this is BRUTAL!
       
   334                     notify_fixed(fix)
       
   335 
       
   336 
       
   337 def check_mandatory_attributes(schema, cnx, eids, fix=1):
       
   338     """check for entities stored in the system source missing some mandatory
       
   339     attribute
       
   340     """
       
   341     print('Checking mandatory attributes')
       
   342     msg = '%s #%s is missing mandatory attribute %s (autofix will delete the entity)'
       
   343     for rschema in schema.relations():
       
   344         if not rschema.final or rschema in VIRTUAL_RTYPES:
       
   345             continue
       
   346         for rdef in rschema.rdefs.values():
       
   347             if rdef.cardinality[0] in '1+':
       
   348                 rql = 'Any X WHERE X %s NULL, X is %s, X cw_source S, S name "system"' % (
       
   349                     rschema, rdef.subject)
       
   350                 for entity in cnx.execute(rql).entities():
       
   351                     sys.stderr.write(msg % (entity.cw_etype, entity.eid, rschema))
       
   352                     if fix:
       
   353                         entity.cw_delete()
       
   354                     notify_fixed(fix)
       
   355 
       
   356 
       
   357 def check_metadata(schema, cnx, eids, fix=1):
       
   358     """check entities has required metadata
       
   359 
       
   360     FIXME: rewrite using RQL queries ?
       
   361     """
       
   362     print('Checking metadata')
       
   363     cursor = cnx.system_sql("SELECT DISTINCT type FROM entities;")
       
   364     eidcolumn = SQL_PREFIX + 'eid'
       
   365     msg = '  %s with eid %s has no %s (autofix will set it to now)'
       
   366     for etype, in cursor.fetchall():
       
   367         if etype not in cnx.vreg.schema:
       
   368             sys.stderr.write('entities table references unknown type %s\n' %
       
   369                              etype)
       
   370             if fix:
       
   371                 cnx.system_sql("DELETE FROM entities WHERE type = %(type)s",
       
   372                                    {'type': etype})
       
   373             continue
       
   374         table = SQL_PREFIX + etype
       
   375         for rel, default in ( ('creation_date', datetime.utcnow()),
       
   376                               ('modification_date', datetime.utcnow()), ):
       
   377             column = SQL_PREFIX + rel
       
   378             cursor = cnx.system_sql("SELECT %s FROM %s WHERE %s is NULL"
       
   379                                         % (eidcolumn, table, column))
       
   380             for eid, in cursor.fetchall():
       
   381                 sys.stderr.write(msg % (etype, eid, rel))
       
   382                 if fix:
       
   383                     cnx.system_sql("UPDATE %s SET %s=%%(v)s WHERE %s=%s ;"
       
   384                                        % (table, column, eidcolumn, eid),
       
   385                                        {'v': default})
       
   386                 notify_fixed(fix)
       
   387 
       
   388 
       
   389 def check(repo, cnx, checks, reindex, fix, withpb=True):
       
   390     """check integrity of instance's repository,
       
   391     using given user and password to locally connect to the repository
       
   392     (no running cubicweb server needed)
       
   393     """
       
   394     # yo, launch checks
       
   395     if checks:
       
   396         eids_cache = {}
       
   397         with cnx.security_enabled(read=False, write=False): # ensure no read security
       
   398             for check in checks:
       
   399                 check_func = globals()['check_%s' % check]
       
   400                 check_func(repo.schema, cnx, eids_cache, fix=fix)
       
   401         if fix:
       
   402             cnx.commit()
       
   403         else:
       
   404             print()
       
   405         if not fix:
       
   406             print('WARNING: Diagnostic run, nothing has been corrected')
       
   407     if reindex:
       
   408         cnx.rollback()
       
   409         reindex_entities(repo.schema, cnx, withpb=withpb)
       
   410         cnx.commit()