server/checkintegrity.py
changeset 0 b97547f5f1fa
child 380 06e7f2932afe
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
       
     1 """Check integrity of a CubicWeb repository. Hum actually only the system database
       
     2 is checked.
       
     3 
       
     4 :organization: Logilab
       
     5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     7 """
       
     8 __docformat__ = "restructuredtext en"
       
     9 
       
    10 import sys
       
    11 
       
    12 from mx.DateTime import now
       
    13 from logilab.common.shellutils import ProgressBar
       
    14 
       
    15 def has_eid(sqlcursor, eid, eids):
       
    16     """return true if the eid is a valid eid"""
       
    17     if eids.has_key(eid):
       
    18         return eids[eid]
       
    19     sqlcursor.execute('SELECT type, source FROM entities WHERE eid=%s' % eid)
       
    20     try:
       
    21         etype, source = sqlcursor.fetchone()
       
    22     except:
       
    23         eids[eid] = False
       
    24         return False
       
    25     if source and source != 'system':
       
    26         # XXX what to do...
       
    27         eids[eid] = True
       
    28         return True
       
    29     sqlcursor.execute('SELECT * FROM %s WHERE eid=%s' % (etype, eid))
       
    30     result = sqlcursor.fetchall()
       
    31     if len(result) == 0:
       
    32         eids[eid] = False
       
    33         return False
       
    34     elif len(result) > 1:
       
    35         msg = '  More than one entity with eid %s exists in source !'
       
    36         print >> sys.stderr, msg % eid
       
    37         print >> sys.stderr, '  WARNING : Unable to fix this, do it yourself !'
       
    38     eids[eid] = True
       
    39     return True
       
    40 
       
    41 # XXX move to yams?
       
    42 def etype_fti_containers(eschema, _done=None):
       
    43     if _done is None:
       
    44         _done = set()
       
    45     _done.add(eschema)
       
    46     containers = tuple(eschema.fulltext_containers())
       
    47     if containers:
       
    48         for rschema, target in containers:
       
    49             if target == 'object':
       
    50                 targets = rschema.objects(eschema)
       
    51             else:
       
    52                 targets = rschema.subjects(eschema)
       
    53             for targeteschema in targets:
       
    54                 if targeteschema in _done:
       
    55                     continue
       
    56                 _done.add(targeteschema)
       
    57                 for container in etype_fti_containers(targeteschema, _done):
       
    58                     yield container
       
    59     else:
       
    60         yield eschema
       
    61     
       
    62 def reindex_entities(schema, session):
       
    63     """reindex all entities in the repository"""
       
    64     # deactivate modification_date hook since we don't want them
       
    65     # to be updated due to the reindexation
       
    66     from cubicweb.server.hooks import (setmtime_before_update_entity,
       
    67                                        uniquecstrcheck_before_modification)
       
    68     from cubicweb.server.repository import FTIndexEntityOp
       
    69     repo = session.repo
       
    70     repo.hm.unregister_hook(setmtime_before_update_entity,
       
    71                             'before_update_entity', '')
       
    72     repo.hm.unregister_hook(uniquecstrcheck_before_modification,
       
    73                             'before_update_entity', '')
       
    74     etypes = set()
       
    75     for eschema in schema.entities():
       
    76         if eschema.is_final():
       
    77             continue
       
    78         indexable_attrs = tuple(eschema.indexable_attributes()) # generator
       
    79         if not indexable_attrs:
       
    80             continue
       
    81         for container in etype_fti_containers(eschema):
       
    82             etypes.add(container)
       
    83     print 'Reindexing entities of type %s' % \
       
    84           ', '.join(sorted(str(e) for e in etypes))
       
    85     pb = ProgressBar(len(etypes) + 1)
       
    86     # first monkey patch Entity.check to disable validation
       
    87     from cubicweb.common.entity import Entity
       
    88     _check = Entity.check
       
    89     Entity.check = lambda self, creation=False: True
       
    90     # clear fti table first
       
    91     session.system_sql('DELETE FROM %s' % session.repo.system_source.dbhelper.fti_table)
       
    92     pb.update()
       
    93     # reindex entities by generating rql queries which set all indexable
       
    94     # attribute to their current value
       
    95     for eschema in etypes:
       
    96         for entity in session.execute('Any X WHERE X is %s' % eschema).entities():
       
    97             FTIndexEntityOp(session, entity=entity)
       
    98         pb.update()
       
    99     # restore Entity.check
       
   100     Entity.check = _check
       
   101 
       
   102     
       
   103 def check_schema(session):
       
   104     """check serialized schema"""
       
   105     print 'Checking serialized schema'
       
   106     unique_constraints = ('SizeConstraint', 'FormatConstraint',
       
   107                           'VocabularyConstraint', 'RQLConstraint',
       
   108                           'RQLVocabularyConstraint')
       
   109     rql = ('Any COUNT(X),RN,EN,ECTN GROUPBY RN,EN,ECTN ORDERBY 1 '
       
   110            'WHERE X is Econstraint, R constrained_by X, '
       
   111            'R relation_type RT, R from_entity ET, RT name RN, '
       
   112            'ET name EN, X cstrtype ECT, ECT name ECTN')
       
   113     for count, rn, en, cstrname in session.execute(rql):
       
   114         if count == 1:
       
   115             continue
       
   116         if cstrname in unique_constraints:
       
   117             print "ERROR: got %s %r constraints on relation %s.%s" % (
       
   118                 count, cstrname, en, rn)
       
   119 
       
   120 
       
   121     
       
   122 def check_text_index(schema, session, eids, fix=1):
       
   123     """check all entities registered in the text index"""
       
   124     print 'Checking text index'
       
   125     cursor = session.system_sql('SELECT uid FROM appears;')
       
   126     for row in cursor.fetchall():
       
   127         eid = row[0]
       
   128         if not has_eid(cursor, eid, eids):
       
   129             msg = '  Entity with eid %s exists in the text index but in no source'
       
   130             print >> sys.stderr, msg % eid,
       
   131             if fix:
       
   132                 session.system_sql('DELETE FROM appears WHERE uid=%s;' % eid)
       
   133                 print >> sys.stderr, ' [FIXED]'
       
   134             else:
       
   135                 print >> sys.stderr
       
   136 
       
   137 
       
   138 def check_entities(schema, session, eids, fix=1):
       
   139     """check all entities registered in the repo system table"""
       
   140     print 'Checking entities system table'
       
   141     cursor = session.system_sql('SELECT eid FROM entities;')
       
   142     for row in cursor.fetchall():
       
   143         eid = row[0]
       
   144         if not has_eid(cursor, eid, eids):
       
   145             msg = '  Entity with eid %s exists in the system table but in no source'
       
   146             print >> sys.stderr, msg % eid,
       
   147             if fix:
       
   148                 session.system_sql('DELETE FROM entities WHERE eid=%s;' % eid)
       
   149                 print >> sys.stderr, ' [FIXED]'
       
   150             else:
       
   151                 print >> sys.stderr
       
   152     print 'Checking entities tables'
       
   153     for eschema in schema.entities():
       
   154         if eschema.is_final():
       
   155             continue
       
   156         cursor = session.system_sql('SELECT eid FROM %s;' % eschema.type)
       
   157         for row in cursor.fetchall():
       
   158             eid = row[0]
       
   159             # eids is full since we have fetched everyting from the entities table,
       
   160             # no need to call has_eid
       
   161             if not eid in eids or not eids[eid]:
       
   162                 msg = '  Entity with eid %s exists in the %s table but not in the system table'
       
   163                 print >> sys.stderr, msg % (eid, eschema.type),
       
   164                 if fix:
       
   165                     session.system_sql('DELETE FROM %s WHERE eid=%s;' % (eschema.type, eid))
       
   166                     print >> sys.stderr, ' [FIXED]'
       
   167                 else:
       
   168                     print >> sys.stderr
       
   169                 
       
   170             
       
   171 def bad_related_msg(rtype, target, eid, fix):
       
   172     msg = '  A relation %s with %s eid %s exists but no such entity in sources'
       
   173     print >> sys.stderr, msg % (rtype, target, eid),
       
   174     if fix:
       
   175         print >> sys.stderr, ' [FIXED]'
       
   176     else:
       
   177         print >> sys.stderr
       
   178     
       
   179     
       
   180 def check_relations(schema, session, eids, fix=1):
       
   181     """check all relations registered in the repo system table"""
       
   182     print 'Checking relations'
       
   183     for rschema in schema.relations():
       
   184         if rschema.is_final():
       
   185             continue
       
   186         rtype = rschema.type
       
   187         if rtype == 'identity':
       
   188             continue
       
   189         if rschema.inlined:
       
   190             for subjtype in rschema.subjects():
       
   191                 cursor = session.system_sql('SELECT %s FROM %s WHERE %s IS NOT NULL;'
       
   192                                             % (rtype, subjtype, rtype))
       
   193                 for row in cursor.fetchall():
       
   194                     eid = row[0]
       
   195                     if not has_eid(cursor, eid, eids):
       
   196                         bad_related_msg(rtype, 'object', eid, fix)
       
   197                         if fix:
       
   198                             session.system_sql('UPDATE %s SET %s = NULL WHERE eid=%s;'
       
   199                                                % (subjtype, rtype, eid))
       
   200             continue
       
   201         cursor = session.system_sql('SELECT eid_from FROM %s_relation;' % rtype)
       
   202         for row in cursor.fetchall():
       
   203             eid = row[0]
       
   204             if not has_eid(cursor, eid, eids):
       
   205                 bad_related_msg(rtype, 'subject', eid, fix)
       
   206                 if fix:
       
   207                     session.system_sql(
       
   208                         'DELETE FROM %s_relations WHERE eid_from=%s;' % (rtype, eid))
       
   209         cursor = session.system_sql('SELECT eid_to FROM %s_relation;' % rtype)
       
   210         for row in cursor.fetchall():
       
   211             eid = row[0]
       
   212             if not has_eid(cursor, eid, eids):
       
   213                 bad_related_msg(rtype, 'object', eid, fix)
       
   214                 if fix:
       
   215                     session.system_sql('DELETE FROM relations WHERE eid_to=%s;' % eid)
       
   216 
       
   217 
       
   218 def check_metadata(schema, session, eids, fix=1):
       
   219     """check entities has required metadata
       
   220 
       
   221     FIXME: rewrite using RQL queries ?
       
   222     """
       
   223     print 'Checking metadata'
       
   224     cursor = session.system_sql("SELECT DISTINCT type FROM entities;")
       
   225     for etype, in cursor.fetchall():
       
   226         for rel, default in ( ('creation_date', now()),
       
   227                               ('modification_date', now()), ):
       
   228             cursor = session.system_sql("SELECT eid FROM %s "
       
   229                                         "WHERE %s is NULL" % (etype, rel))
       
   230             for eid, in cursor.fetchall():
       
   231                 msg = '  %s with eid %s has no %s'
       
   232                 print >> sys.stderr, msg % (etype, eid, rel),
       
   233                 if fix:
       
   234                     session.system_sql("UPDATE %s SET %s=%(default)s WHERE eid=%s ;"
       
   235                                        % (etype, rel, eid), {'default': default})
       
   236                     print >> sys.stderr, ' [FIXED]'
       
   237                 else:
       
   238                     print >> sys.stderr
       
   239     cursor = session.system_sql('SELECT MIN(eid) FROM euser;')
       
   240     default_user_eid = cursor.fetchone()[0]
       
   241     assert default_user_eid is not None, 'no user defined !'
       
   242     for rel, default in ( ('owned_by', default_user_eid), ):
       
   243         cursor = session.system_sql("SELECT eid, type FROM entities "
       
   244                                     "WHERE NOT EXISTS "
       
   245                                     "(SELECT 1 FROM %s_relation WHERE eid_from=eid);"
       
   246                                     % rel)
       
   247         for eid, etype in cursor.fetchall():
       
   248             msg = '  %s with eid %s has no %s relation'
       
   249             print >> sys.stderr, msg % (etype, eid, rel),
       
   250             if fix:
       
   251                 session.system_sql('INSERT INTO %s_relation VALUES (%s, %s) ;'
       
   252                                    % (rel, eid, default))
       
   253                 print >> sys.stderr, ' [FIXED]'
       
   254             else:
       
   255                 print >> sys.stderr
       
   256 
       
   257 
       
   258 def check(repo, cnx, checks, reindex, fix):
       
   259     """check integrity of application's repository,
       
   260     using given user and password to locally connect to the repository
       
   261     (no running cubicweb server needed)
       
   262     """
       
   263     session = repo._get_session(cnx.sessionid, setpool=True)
       
   264     # yo, launch checks
       
   265     if checks:
       
   266         eids_cache = {}
       
   267         for check in checks:
       
   268             check_func = globals()['check_%s' % check]
       
   269             check_func(repo.schema, session, eids_cache, fix=fix)
       
   270         if fix:
       
   271             cnx.commit()
       
   272         else:
       
   273             print
       
   274         if not fix:
       
   275             print 'WARNING: Diagnostic run, nothing has been corrected'
       
   276     if reindex:
       
   277         cnx.rollback()
       
   278         session.set_pool()
       
   279         reindex_entities(repo.schema, session)
       
   280         cnx.commit()