--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/checkintegrity.py Wed Nov 05 15:52:50 2008 +0100
@@ -0,0 +1,280 @@
+"""Check integrity of a CubicWeb repository. Hum actually only the system database
+is checked.
+
+:organization: Logilab
+:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
+"""
+__docformat__ = "restructuredtext en"
+
+import sys
+
+from mx.DateTime import now
+from logilab.common.shellutils import ProgressBar
+
+def has_eid(sqlcursor, eid, eids):
+ """return true if the eid is a valid eid"""
+ if eids.has_key(eid):
+ return eids[eid]
+ sqlcursor.execute('SELECT type, source FROM entities WHERE eid=%s' % eid)
+ try:
+ etype, source = sqlcursor.fetchone()
+ except:
+ eids[eid] = False
+ return False
+ if source and source != 'system':
+ # XXX what to do...
+ eids[eid] = True
+ return True
+ sqlcursor.execute('SELECT * FROM %s WHERE eid=%s' % (etype, eid))
+ result = sqlcursor.fetchall()
+ if len(result) == 0:
+ eids[eid] = False
+ return False
+ elif len(result) > 1:
+ msg = ' More than one entity with eid %s exists in source !'
+ print >> sys.stderr, msg % eid
+ print >> sys.stderr, ' WARNING : Unable to fix this, do it yourself !'
+ eids[eid] = True
+ return True
+
+# XXX move to yams?
+def etype_fti_containers(eschema, _done=None):
+ if _done is None:
+ _done = set()
+ _done.add(eschema)
+ containers = tuple(eschema.fulltext_containers())
+ if containers:
+ for rschema, target in containers:
+ if target == 'object':
+ targets = rschema.objects(eschema)
+ else:
+ targets = rschema.subjects(eschema)
+ for targeteschema in targets:
+ if targeteschema in _done:
+ continue
+ _done.add(targeteschema)
+ for container in etype_fti_containers(targeteschema, _done):
+ yield container
+ else:
+ yield eschema
+
+def reindex_entities(schema, session):
+ """reindex all entities in the repository"""
+ # deactivate modification_date hook since we don't want them
+ # to be updated due to the reindexation
+ from cubicweb.server.hooks import (setmtime_before_update_entity,
+ uniquecstrcheck_before_modification)
+ from cubicweb.server.repository import FTIndexEntityOp
+ repo = session.repo
+ repo.hm.unregister_hook(setmtime_before_update_entity,
+ 'before_update_entity', '')
+ repo.hm.unregister_hook(uniquecstrcheck_before_modification,
+ 'before_update_entity', '')
+ etypes = set()
+ for eschema in schema.entities():
+ if eschema.is_final():
+ continue
+ indexable_attrs = tuple(eschema.indexable_attributes()) # generator
+ if not indexable_attrs:
+ continue
+ for container in etype_fti_containers(eschema):
+ etypes.add(container)
+ print 'Reindexing entities of type %s' % \
+ ', '.join(sorted(str(e) for e in etypes))
+ pb = ProgressBar(len(etypes) + 1)
+ # first monkey patch Entity.check to disable validation
+ from cubicweb.common.entity import Entity
+ _check = Entity.check
+ Entity.check = lambda self, creation=False: True
+ # clear fti table first
+ session.system_sql('DELETE FROM %s' % session.repo.system_source.dbhelper.fti_table)
+ pb.update()
+ # reindex entities by generating rql queries which set all indexable
+ # attribute to their current value
+ for eschema in etypes:
+ for entity in session.execute('Any X WHERE X is %s' % eschema).entities():
+ FTIndexEntityOp(session, entity=entity)
+ pb.update()
+ # restore Entity.check
+ Entity.check = _check
+
+
+def check_schema(session):
+ """check serialized schema"""
+ print 'Checking serialized schema'
+ unique_constraints = ('SizeConstraint', 'FormatConstraint',
+ 'VocabularyConstraint', 'RQLConstraint',
+ 'RQLVocabularyConstraint')
+ rql = ('Any COUNT(X),RN,EN,ECTN GROUPBY RN,EN,ECTN ORDERBY 1 '
+ 'WHERE X is Econstraint, R constrained_by X, '
+ 'R relation_type RT, R from_entity ET, RT name RN, '
+ 'ET name EN, X cstrtype ECT, ECT name ECTN')
+ for count, rn, en, cstrname in session.execute(rql):
+ if count == 1:
+ continue
+ if cstrname in unique_constraints:
+ print "ERROR: got %s %r constraints on relation %s.%s" % (
+ count, cstrname, en, rn)
+
+
+
+def check_text_index(schema, session, eids, fix=1):
+ """check all entities registered in the text index"""
+ print 'Checking text index'
+ cursor = session.system_sql('SELECT uid FROM appears;')
+ for row in cursor.fetchall():
+ eid = row[0]
+ if not has_eid(cursor, eid, eids):
+ msg = ' Entity with eid %s exists in the text index but in no source'
+ print >> sys.stderr, msg % eid,
+ if fix:
+ session.system_sql('DELETE FROM appears WHERE uid=%s;' % eid)
+ print >> sys.stderr, ' [FIXED]'
+ else:
+ print >> sys.stderr
+
+
+def check_entities(schema, session, eids, fix=1):
+ """check all entities registered in the repo system table"""
+ print 'Checking entities system table'
+ cursor = session.system_sql('SELECT eid FROM entities;')
+ for row in cursor.fetchall():
+ eid = row[0]
+ if not has_eid(cursor, eid, eids):
+ msg = ' Entity with eid %s exists in the system table but in no source'
+ print >> sys.stderr, msg % eid,
+ if fix:
+ session.system_sql('DELETE FROM entities WHERE eid=%s;' % eid)
+ print >> sys.stderr, ' [FIXED]'
+ else:
+ print >> sys.stderr
+ print 'Checking entities tables'
+ for eschema in schema.entities():
+ if eschema.is_final():
+ continue
+ cursor = session.system_sql('SELECT eid FROM %s;' % eschema.type)
+ for row in cursor.fetchall():
+ eid = row[0]
+ # eids is full since we have fetched everyting from the entities table,
+ # no need to call has_eid
+ if not eid in eids or not eids[eid]:
+ msg = ' Entity with eid %s exists in the %s table but not in the system table'
+ print >> sys.stderr, msg % (eid, eschema.type),
+ if fix:
+ session.system_sql('DELETE FROM %s WHERE eid=%s;' % (eschema.type, eid))
+ print >> sys.stderr, ' [FIXED]'
+ else:
+ print >> sys.stderr
+
+
+def bad_related_msg(rtype, target, eid, fix):
+ msg = ' A relation %s with %s eid %s exists but no such entity in sources'
+ print >> sys.stderr, msg % (rtype, target, eid),
+ if fix:
+ print >> sys.stderr, ' [FIXED]'
+ else:
+ print >> sys.stderr
+
+
+def check_relations(schema, session, eids, fix=1):
+ """check all relations registered in the repo system table"""
+ print 'Checking relations'
+ for rschema in schema.relations():
+ if rschema.is_final():
+ continue
+ rtype = rschema.type
+ if rtype == 'identity':
+ continue
+ if rschema.inlined:
+ for subjtype in rschema.subjects():
+ cursor = session.system_sql('SELECT %s FROM %s WHERE %s IS NOT NULL;'
+ % (rtype, subjtype, rtype))
+ for row in cursor.fetchall():
+ eid = row[0]
+ if not has_eid(cursor, eid, eids):
+ bad_related_msg(rtype, 'object', eid, fix)
+ if fix:
+ session.system_sql('UPDATE %s SET %s = NULL WHERE eid=%s;'
+ % (subjtype, rtype, eid))
+ continue
+ cursor = session.system_sql('SELECT eid_from FROM %s_relation;' % rtype)
+ for row in cursor.fetchall():
+ eid = row[0]
+ if not has_eid(cursor, eid, eids):
+ bad_related_msg(rtype, 'subject', eid, fix)
+ if fix:
+ session.system_sql(
+ 'DELETE FROM %s_relations WHERE eid_from=%s;' % (rtype, eid))
+ cursor = session.system_sql('SELECT eid_to FROM %s_relation;' % rtype)
+ for row in cursor.fetchall():
+ eid = row[0]
+ if not has_eid(cursor, eid, eids):
+ bad_related_msg(rtype, 'object', eid, fix)
+ if fix:
+ session.system_sql('DELETE FROM relations WHERE eid_to=%s;' % eid)
+
+
+def check_metadata(schema, session, eids, fix=1):
+ """check entities has required metadata
+
+ FIXME: rewrite using RQL queries ?
+ """
+ print 'Checking metadata'
+ cursor = session.system_sql("SELECT DISTINCT type FROM entities;")
+ for etype, in cursor.fetchall():
+ for rel, default in ( ('creation_date', now()),
+ ('modification_date', now()), ):
+ cursor = session.system_sql("SELECT eid FROM %s "
+ "WHERE %s is NULL" % (etype, rel))
+ for eid, in cursor.fetchall():
+ msg = ' %s with eid %s has no %s'
+ print >> sys.stderr, msg % (etype, eid, rel),
+ if fix:
+ session.system_sql("UPDATE %s SET %s=%(default)s WHERE eid=%s ;"
+ % (etype, rel, eid), {'default': default})
+ print >> sys.stderr, ' [FIXED]'
+ else:
+ print >> sys.stderr
+ cursor = session.system_sql('SELECT MIN(eid) FROM euser;')
+ default_user_eid = cursor.fetchone()[0]
+ assert default_user_eid is not None, 'no user defined !'
+ for rel, default in ( ('owned_by', default_user_eid), ):
+ cursor = session.system_sql("SELECT eid, type FROM entities "
+ "WHERE NOT EXISTS "
+ "(SELECT 1 FROM %s_relation WHERE eid_from=eid);"
+ % rel)
+ for eid, etype in cursor.fetchall():
+ msg = ' %s with eid %s has no %s relation'
+ print >> sys.stderr, msg % (etype, eid, rel),
+ if fix:
+ session.system_sql('INSERT INTO %s_relation VALUES (%s, %s) ;'
+ % (rel, eid, default))
+ print >> sys.stderr, ' [FIXED]'
+ else:
+ print >> sys.stderr
+
+
+def check(repo, cnx, checks, reindex, fix):
+ """check integrity of application's repository,
+ using given user and password to locally connect to the repository
+ (no running cubicweb server needed)
+ """
+ session = repo._get_session(cnx.sessionid, setpool=True)
+ # yo, launch checks
+ if checks:
+ eids_cache = {}
+ for check in checks:
+ check_func = globals()['check_%s' % check]
+ check_func(repo.schema, session, eids_cache, fix=fix)
+ if fix:
+ cnx.commit()
+ else:
+ print
+ if not fix:
+ print 'WARNING: Diagnostic run, nothing has been corrected'
+ if reindex:
+ cnx.rollback()
+ session.set_pool()
+ reindex_entities(repo.schema, session)
+ cnx.commit()