changeset 0 b97547f5f1fa
child 380 06e7f2932afe
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
     1 """Check integrity of a CubicWeb repository. Hum actually only the system database
     2 is checked.
     4 :organization: Logilab
     5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     6 :contact: --
     7 """
     8 __docformat__ = "restructuredtext en"
    10 import sys
    12 from mx.DateTime import now
    13 from logilab.common.shellutils import ProgressBar
    15 def has_eid(sqlcursor, eid, eids):
    16     """return true if the eid is a valid eid"""
    17     if eids.has_key(eid):
    18         return eids[eid]
    19     sqlcursor.execute('SELECT type, source FROM entities WHERE eid=%s' % eid)
    20     try:
    21         etype, source = sqlcursor.fetchone()
    22     except:
    23         eids[eid] = False
    24         return False
    25     if source and source != 'system':
    26         # XXX what to do...
    27         eids[eid] = True
    28         return True
    29     sqlcursor.execute('SELECT * FROM %s WHERE eid=%s' % (etype, eid))
    30     result = sqlcursor.fetchall()
    31     if len(result) == 0:
    32         eids[eid] = False
    33         return False
    34     elif len(result) > 1:
    35         msg = '  More than one entity with eid %s exists in source !'
    36         print >> sys.stderr, msg % eid
    37         print >> sys.stderr, '  WARNING : Unable to fix this, do it yourself !'
    38     eids[eid] = True
    39     return True
    41 # XXX move to yams?
    42 def etype_fti_containers(eschema, _done=None):
    43     if _done is None:
    44         _done = set()
    45     _done.add(eschema)
    46     containers = tuple(eschema.fulltext_containers())
    47     if containers:
    48         for rschema, target in containers:
    49             if target == 'object':
    50                 targets = rschema.objects(eschema)
    51             else:
    52                 targets = rschema.subjects(eschema)
    53             for targeteschema in targets:
    54                 if targeteschema in _done:
    55                     continue
    56                 _done.add(targeteschema)
    57                 for container in etype_fti_containers(targeteschema, _done):
    58                     yield container
    59     else:
    60         yield eschema
    62 def reindex_entities(schema, session):
    63     """reindex all entities in the repository"""
    64     # deactivate modification_date hook since we don't want them
    65     # to be updated due to the reindexation
    66     from cubicweb.server.hooks import (setmtime_before_update_entity,
    67                                        uniquecstrcheck_before_modification)
    68     from cubicweb.server.repository import FTIndexEntityOp
    69     repo = session.repo
    71                             'before_update_entity', '')
    73                             'before_update_entity', '')
    74     etypes = set()
    75     for eschema in schema.entities():
    76         if eschema.is_final():
    77             continue
    78         indexable_attrs = tuple(eschema.indexable_attributes()) # generator
    79         if not indexable_attrs:
    80             continue
    81         for container in etype_fti_containers(eschema):
    82             etypes.add(container)
    83     print 'Reindexing entities of type %s' % \
    84           ', '.join(sorted(str(e) for e in etypes))
    85     pb = ProgressBar(len(etypes) + 1)
    86     # first monkey patch Entity.check to disable validation
    87     from cubicweb.common.entity import Entity
    88     _check = Entity.check
    89     Entity.check = lambda self, creation=False: True
    90     # clear fti table first
    91     session.system_sql('DELETE FROM %s' % session.repo.system_source.dbhelper.fti_table)
    92     pb.update()
    93     # reindex entities by generating rql queries which set all indexable
    94     # attribute to their current value
    95     for eschema in etypes:
    96         for entity in session.execute('Any X WHERE X is %s' % eschema).entities():
    97             FTIndexEntityOp(session, entity=entity)
    98         pb.update()
    99     # restore Entity.check
   100     Entity.check = _check
   103 def check_schema(session):
   104     """check serialized schema"""
   105     print 'Checking serialized schema'
   106     unique_constraints = ('SizeConstraint', 'FormatConstraint',
   107                           'VocabularyConstraint', 'RQLConstraint',
   108                           'RQLVocabularyConstraint')
   110            'WHERE X is Econstraint, R constrained_by X, '
   111            'R relation_type RT, R from_entity ET, RT name RN, '
   112            'ET name EN, X cstrtype ECT, ECT name ECTN')
   113     for count, rn, en, cstrname in session.execute(rql):
   114         if count == 1:
   115             continue
   116         if cstrname in unique_constraints:
   117             print "ERROR: got %s %r constraints on relation %s.%s" % (
   118                 count, cstrname, en, rn)
   122 def check_text_index(schema, session, eids, fix=1):
   123     """check all entities registered in the text index"""
   124     print 'Checking text index'
   125     cursor = session.system_sql('SELECT uid FROM appears;')
   126     for row in cursor.fetchall():
   127         eid = row[0]
   128         if not has_eid(cursor, eid, eids):
   129             msg = '  Entity with eid %s exists in the text index but in no source'
   130             print >> sys.stderr, msg % eid,
   131             if fix:
   132                 session.system_sql('DELETE FROM appears WHERE uid=%s;' % eid)
   133                 print >> sys.stderr, ' [FIXED]'
   134             else:
   135                 print >> sys.stderr
   138 def check_entities(schema, session, eids, fix=1):
   139     """check all entities registered in the repo system table"""
   140     print 'Checking entities system table'
   141     cursor = session.system_sql('SELECT eid FROM entities;')
   142     for row in cursor.fetchall():
   143         eid = row[0]
   144         if not has_eid(cursor, eid, eids):
   145             msg = '  Entity with eid %s exists in the system table but in no source'
   146             print >> sys.stderr, msg % eid,
   147             if fix:
   148                 session.system_sql('DELETE FROM entities WHERE eid=%s;' % eid)
   149                 print >> sys.stderr, ' [FIXED]'
   150             else:
   151                 print >> sys.stderr
   152     print 'Checking entities tables'
   153     for eschema in schema.entities():
   154         if eschema.is_final():
   155             continue
   156         cursor = session.system_sql('SELECT eid FROM %s;' % eschema.type)
   157         for row in cursor.fetchall():
   158             eid = row[0]
   159             # eids is full since we have fetched everyting from the entities table,
   160             # no need to call has_eid
   161             if not eid in eids or not eids[eid]:
   162                 msg = '  Entity with eid %s exists in the %s table but not in the system table'
   163                 print >> sys.stderr, msg % (eid, eschema.type),
   164                 if fix:
   165                     session.system_sql('DELETE FROM %s WHERE eid=%s;' % (eschema.type, eid))
   166                     print >> sys.stderr, ' [FIXED]'
   167                 else:
   168                     print >> sys.stderr
   171 def bad_related_msg(rtype, target, eid, fix):
   172     msg = '  A relation %s with %s eid %s exists but no such entity in sources'
   173     print >> sys.stderr, msg % (rtype, target, eid),
   174     if fix:
   175         print >> sys.stderr, ' [FIXED]'
   176     else:
   177         print >> sys.stderr
   180 def check_relations(schema, session, eids, fix=1):
   181     """check all relations registered in the repo system table"""
   182     print 'Checking relations'
   183     for rschema in schema.relations():
   184         if rschema.is_final():
   185             continue
   186         rtype = rschema.type
   187         if rtype == 'identity':
   188             continue
   189         if rschema.inlined:
   190             for subjtype in rschema.subjects():
   191                 cursor = session.system_sql('SELECT %s FROM %s WHERE %s IS NOT NULL;'
   192                                             % (rtype, subjtype, rtype))
   193                 for row in cursor.fetchall():
   194                     eid = row[0]
   195                     if not has_eid(cursor, eid, eids):
   196                         bad_related_msg(rtype, 'object', eid, fix)
   197                         if fix:
   198                             session.system_sql('UPDATE %s SET %s = NULL WHERE eid=%s;'
   199                                                % (subjtype, rtype, eid))
   200             continue
   201         cursor = session.system_sql('SELECT eid_from FROM %s_relation;' % rtype)
   202         for row in cursor.fetchall():
   203             eid = row[0]
   204             if not has_eid(cursor, eid, eids):
   205                 bad_related_msg(rtype, 'subject', eid, fix)
   206                 if fix:
   207                     session.system_sql(
   208                         'DELETE FROM %s_relations WHERE eid_from=%s;' % (rtype, eid))
   209         cursor = session.system_sql('SELECT eid_to FROM %s_relation;' % rtype)
   210         for row in cursor.fetchall():
   211             eid = row[0]
   212             if not has_eid(cursor, eid, eids):
   213                 bad_related_msg(rtype, 'object', eid, fix)
   214                 if fix:
   215                     session.system_sql('DELETE FROM relations WHERE eid_to=%s;' % eid)
   218 def check_metadata(schema, session, eids, fix=1):
   219     """check entities has required metadata
   221     FIXME: rewrite using RQL queries ?
   222     """
   223     print 'Checking metadata'
   224     cursor = session.system_sql("SELECT DISTINCT type FROM entities;")
   225     for etype, in cursor.fetchall():
   226         for rel, default in ( ('creation_date', now()),
   227                               ('modification_date', now()), ):
   228             cursor = session.system_sql("SELECT eid FROM %s "
   229                                         "WHERE %s is NULL" % (etype, rel))
   230             for eid, in cursor.fetchall():
   231                 msg = '  %s with eid %s has no %s'
   232                 print >> sys.stderr, msg % (etype, eid, rel),
   233                 if fix:
   234                     session.system_sql("UPDATE %s SET %s=%(default)s WHERE eid=%s ;"
   235                                        % (etype, rel, eid), {'default': default})
   236                     print >> sys.stderr, ' [FIXED]'
   237                 else:
   238                     print >> sys.stderr
   239     cursor = session.system_sql('SELECT MIN(eid) FROM euser;')
   240     default_user_eid = cursor.fetchone()[0]
   241     assert default_user_eid is not None, 'no user defined !'
   242     for rel, default in ( ('owned_by', default_user_eid), ):
   243         cursor = session.system_sql("SELECT eid, type FROM entities "
   244                                     "WHERE NOT EXISTS "
   245                                     "(SELECT 1 FROM %s_relation WHERE eid_from=eid);"
   246                                     % rel)
   247         for eid, etype in cursor.fetchall():
   248             msg = '  %s with eid %s has no %s relation'
   249             print >> sys.stderr, msg % (etype, eid, rel),
   250             if fix:
   251                 session.system_sql('INSERT INTO %s_relation VALUES (%s, %s) ;'
   252                                    % (rel, eid, default))
   253                 print >> sys.stderr, ' [FIXED]'
   254             else:
   255                 print >> sys.stderr
   258 def check(repo, cnx, checks, reindex, fix):
   259     """check integrity of application's repository,
   260     using given user and password to locally connect to the repository
   261     (no running cubicweb server needed)
   262     """
   263     session = repo._get_session(cnx.sessionid, setpool=True)
   264     # yo, launch checks
   265     if checks:
   266         eids_cache = {}
   267         for check in checks:
   268             check_func = globals()['check_%s' % check]
   269             check_func(repo.schema, session, eids_cache, fix=fix)
   270         if fix:
   271             cnx.commit()
   272         else:
   273             print
   274         if not fix:
   275             print 'WARNING: Diagnostic run, nothing has been corrected'
   276     if reindex:
   277         cnx.rollback()
   278         session.set_pool()
   279         reindex_entities(repo.schema, session)
   280         cnx.commit()