diff -r 058bb3dc685f -r 0b59724cb3f2 cubicweb/server/checkintegrity.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cubicweb/server/checkintegrity.py Sat Jan 16 13:48:51 2016 +0100 @@ -0,0 +1,410 @@ +# copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""Integrity checking tool for instances: + +* integrity of a CubicWeb repository. Hum actually only the system database is + checked. +""" +from __future__ import print_function + +__docformat__ = "restructuredtext en" + +import sys +from datetime import datetime + +from logilab.common.shellutils import ProgressBar + +from cubicweb.schema import PURE_VIRTUAL_RTYPES, VIRTUAL_RTYPES, UNIQUE_CONSTRAINTS +from cubicweb.server.sqlutils import SQL_PREFIX + +def notify_fixed(fix): + if fix: + sys.stderr.write(' [FIXED]') + sys.stderr.write('\n') + +def has_eid(cnx, sqlcursor, eid, eids): + """return true if the eid is a valid eid""" + if eid in eids: + return eids[eid] + sqlcursor.execute('SELECT type FROM entities WHERE eid=%s' % eid) + try: + etype = sqlcursor.fetchone()[0] + except Exception: + eids[eid] = False + return False + if etype not in cnx.vreg.schema: + eids[eid] = False + return False + sqlcursor.execute('SELECT * FROM %s%s WHERE %seid=%s' % (SQL_PREFIX, etype, + SQL_PREFIX, eid)) + result = sqlcursor.fetchall() + if len(result) == 0: + eids[eid] = False + return False + elif len(result) > 1: + msg = (' More than one entity with eid %s exists in source!\n' + ' WARNING : Unable to fix this, do it yourself!\n') + sys.stderr.write(msg % eid) + eids[eid] = True + return True + +# XXX move to yams? +def etype_fti_containers(eschema, _done=None): + if _done is None: + _done = set() + _done.add(eschema) + containers = tuple(eschema.fulltext_containers()) + if containers: + for rschema, target in containers: + if target == 'object': + targets = rschema.objects(eschema) + else: + targets = rschema.subjects(eschema) + for targeteschema in targets: + if targeteschema in _done: + continue + _done.add(targeteschema) + for container in etype_fti_containers(targeteschema, _done): + yield container + else: + yield eschema + +def reindex_entities(schema, cnx, withpb=True, etypes=None): + """reindex all entities in the repository""" + # deactivate modification_date hook since we don't want them + # to be updated due to the reindexation + repo = cnx.repo + dbhelper = repo.system_source.dbhelper + cursor = cnx.cnxset.cu + if not dbhelper.has_fti_table(cursor): + print('no text index table') + dbhelper.init_fti(cursor) + repo.system_source.do_fti = True # ensure full-text indexation is activated + if etypes is None: + print('Reindexing entities') + etypes = set() + for eschema in schema.entities(): + if eschema.final: + continue + indexable_attrs = tuple(eschema.indexable_attributes()) # generator + if not indexable_attrs: + continue + for container in etype_fti_containers(eschema): + etypes.add(container) + # clear fti table first + cnx.system_sql('DELETE FROM %s' % dbhelper.fti_table) + else: + print('Reindexing entities of type %s' % \ + ', '.join(sorted(str(e) for e in etypes))) + # clear fti table first. Use subquery for sql compatibility + cnx.system_sql("DELETE FROM %s WHERE EXISTS(SELECT 1 FROM ENTITIES " + "WHERE eid=%s AND type IN (%s))" % ( + dbhelper.fti_table, dbhelper.fti_uid_attr, + ','.join("'%s'" % etype for etype in etypes))) + if withpb: + pb = ProgressBar(len(etypes) + 1) + pb.update() + # reindex entities by generating rql queries which set all indexable + # attribute to their current value + source = repo.system_source + for eschema in etypes: + etype_class = cnx.vreg['etypes'].etype_class(str(eschema)) + for rset in etype_class.cw_fti_index_rql_limit(cnx): + source.fti_index_entities(cnx, rset.entities()) + # clear entity cache to avoid high memory consumption on big tables + cnx.drop_entity_cache() + if withpb: + pb.update() + if withpb: + pb.finish() + + +def check_schema(schema, cnx, eids, fix=1): + """check serialized schema""" + print('Checking serialized schema') + rql = ('Any COUNT(X),RN,SN,ON,CTN GROUPBY RN,SN,ON,CTN ORDERBY 1 ' + 'WHERE X is CWConstraint, R constrained_by X, ' + 'R relation_type RT, RT name RN, R from_entity ST, ST name SN, ' + 'R to_entity OT, OT name ON, X cstrtype CT, CT name CTN') + for count, rn, sn, on, cstrname in cnx.execute(rql): + if count == 1: + continue + if cstrname in UNIQUE_CONSTRAINTS: + print("ERROR: got %s %r constraints on relation %s.%s.%s" % ( + count, cstrname, sn, rn, on)) + if fix: + print('dunno how to fix, do it yourself') + + + +def check_text_index(schema, cnx, eids, fix=1): + """check all entities registered in the text index""" + print('Checking text index') + msg = ' Entity with eid %s exists in the text index but in no source (autofix will remove from text index)' + cursor = cnx.system_sql('SELECT uid FROM appears;') + for row in cursor.fetchall(): + eid = row[0] + if not has_eid(cnx, cursor, eid, eids): + sys.stderr.write(msg % eid) + if fix: + cnx.system_sql('DELETE FROM appears WHERE uid=%s;' % eid) + notify_fixed(fix) + + +def check_entities(schema, cnx, eids, fix=1): + """check all entities registered in the repo system table""" + print('Checking entities system table') + # system table but no source + msg = ' Entity %s with eid %s exists in the system table but in no source (autofix will delete the entity)' + cursor = cnx.system_sql('SELECT eid,type FROM entities;') + for row in cursor.fetchall(): + eid, etype = row + if not has_eid(cnx, cursor, eid, eids): + sys.stderr.write(msg % (etype, eid)) + if fix: + cnx.system_sql('DELETE FROM entities WHERE eid=%s;' % eid) + notify_fixed(fix) + # source in entities, but no relation cw_source + # XXX this (get_versions) requires a second connection to the db when we already have one open + applcwversion = cnx.repo.get_versions().get('cubicweb') + if applcwversion >= (3, 13, 1): # entities.asource appeared in 3.13.1 + cursor = cnx.system_sql('SELECT e.eid FROM entities as e, cw_CWSource as s ' + 'WHERE s.cw_name=e.asource AND ' + 'NOT EXISTS(SELECT 1 FROM cw_source_relation as cs ' + ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) ' + 'ORDER BY e.eid') + msg = (' Entity with eid %s refers to source in entities table, ' + 'but is missing relation cw_source (autofix will create the relation)\n') + for row in cursor.fetchall(): + sys.stderr.write(msg % row[0]) + if fix: + cnx.system_sql('INSERT INTO cw_source_relation (eid_from, eid_to) ' + 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWSource as s ' + 'WHERE s.cw_name=e.asource AND NOT EXISTS(SELECT 1 FROM cw_source_relation as cs ' + ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)') + notify_fixed(True) + # inconsistencies for 'is' + msg = ' %s #%s is missing relation "is" (autofix will create the relation)\n' + cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s ' + 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs ' + ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) ' + 'ORDER BY e.eid') + for row in cursor.fetchall(): + sys.stderr.write(msg % tuple(row)) + if fix: + cnx.system_sql('INSERT INTO is_relation (eid_from, eid_to) ' + 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s ' + 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs ' + ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)') + notify_fixed(True) + # inconsistencies for 'is_instance_of' + msg = ' %s #%s is missing relation "is_instance_of" (autofix will create the relation)\n' + cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s ' + 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs ' + ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) ' + 'ORDER BY e.eid') + for row in cursor.fetchall(): + sys.stderr.write(msg % tuple(row)) + if fix: + cnx.system_sql('INSERT INTO is_instance_of_relation (eid_from, eid_to) ' + 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s ' + 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs ' + ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)') + notify_fixed(True) + print('Checking entities tables') + msg = ' Entity with eid %s exists in the %s table but not in the system table (autofix will delete the entity)' + for eschema in schema.entities(): + if eschema.final: + continue + table = SQL_PREFIX + eschema.type + column = SQL_PREFIX + 'eid' + cursor = cnx.system_sql('SELECT %s FROM %s;' % (column, table)) + for row in cursor.fetchall(): + eid = row[0] + # eids is full since we have fetched everything from the entities table, + # no need to call has_eid + if not eid in eids or not eids[eid]: + sys.stderr.write(msg % (eid, eschema.type)) + if fix: + cnx.system_sql('DELETE FROM %s WHERE %s=%s;' % (table, column, eid)) + notify_fixed(fix) + + +def bad_related_msg(rtype, target, eid, fix): + msg = ' A relation %s with %s eid %s exists but no such entity in sources' + sys.stderr.write(msg % (rtype, target, eid)) + notify_fixed(fix) + +def bad_inlined_msg(rtype, parent_eid, eid, fix): + msg = (' An inlined relation %s from %s to %s exists but the latter ' + 'entity does not exist') + sys.stderr.write(msg % (rtype, parent_eid, eid)) + notify_fixed(fix) + + +def check_relations(schema, cnx, eids, fix=1): + """check that eids referenced by relations are registered in the repo system + table + """ + print('Checking relations') + for rschema in schema.relations(): + if rschema.final or rschema.type in PURE_VIRTUAL_RTYPES: + continue + if rschema.inlined: + for subjtype in rschema.subjects(): + table = SQL_PREFIX + str(subjtype) + column = SQL_PREFIX + str(rschema) + sql = 'SELECT cw_eid,%s FROM %s WHERE %s IS NOT NULL;' % ( + column, table, column) + cursor = cnx.system_sql(sql) + for row in cursor.fetchall(): + parent_eid, eid = row + if not has_eid(cnx, cursor, eid, eids): + bad_inlined_msg(rschema, parent_eid, eid, fix) + if fix: + sql = 'UPDATE %s SET %s=NULL WHERE %s=%s;' % ( + table, column, column, eid) + cnx.system_sql(sql) + continue + try: + cursor = cnx.system_sql('SELECT eid_from FROM %s_relation;' % rschema) + except Exception as ex: + # usually because table doesn't exist + print('ERROR', ex) + continue + for row in cursor.fetchall(): + eid = row[0] + if not has_eid(cnx, cursor, eid, eids): + bad_related_msg(rschema, 'subject', eid, fix) + if fix: + sql = 'DELETE FROM %s_relation WHERE eid_from=%s;' % ( + rschema, eid) + cnx.system_sql(sql) + cursor = cnx.system_sql('SELECT eid_to FROM %s_relation;' % rschema) + for row in cursor.fetchall(): + eid = row[0] + if not has_eid(cnx, cursor, eid, eids): + bad_related_msg(rschema, 'object', eid, fix) + if fix: + sql = 'DELETE FROM %s_relation WHERE eid_to=%s;' % ( + rschema, eid) + cnx.system_sql(sql) + + +def check_mandatory_relations(schema, cnx, eids, fix=1): + """check entities missing some mandatory relation""" + print('Checking mandatory relations') + msg = '%s #%s is missing mandatory %s relation %s (autofix will delete the entity)' + for rschema in schema.relations(): + if rschema.final or rschema in PURE_VIRTUAL_RTYPES or rschema in ('is', 'is_instance_of'): + continue + smandatory = set() + omandatory = set() + for rdef in rschema.rdefs.values(): + if rdef.cardinality[0] in '1+': + smandatory.add(rdef.subject) + if rdef.cardinality[1] in '1+': + omandatory.add(rdef.object) + for role, etypes in (('subject', smandatory), ('object', omandatory)): + for etype in etypes: + if role == 'subject': + rql = 'Any X WHERE NOT X %s Y, X is %s' % (rschema, etype) + else: + rql = 'Any X WHERE NOT Y %s X, X is %s' % (rschema, etype) + for entity in cnx.execute(rql).entities(): + sys.stderr.write(msg % (entity.cw_etype, entity.eid, role, rschema)) + if fix: + #if entity.cw_describe()['source']['uri'] == 'system': XXX + entity.cw_delete() # XXX this is BRUTAL! + notify_fixed(fix) + + +def check_mandatory_attributes(schema, cnx, eids, fix=1): + """check for entities stored in the system source missing some mandatory + attribute + """ + print('Checking mandatory attributes') + msg = '%s #%s is missing mandatory attribute %s (autofix will delete the entity)' + for rschema in schema.relations(): + if not rschema.final or rschema in VIRTUAL_RTYPES: + continue + for rdef in rschema.rdefs.values(): + if rdef.cardinality[0] in '1+': + rql = 'Any X WHERE X %s NULL, X is %s, X cw_source S, S name "system"' % ( + rschema, rdef.subject) + for entity in cnx.execute(rql).entities(): + sys.stderr.write(msg % (entity.cw_etype, entity.eid, rschema)) + if fix: + entity.cw_delete() + notify_fixed(fix) + + +def check_metadata(schema, cnx, eids, fix=1): + """check entities has required metadata + + FIXME: rewrite using RQL queries ? + """ + print('Checking metadata') + cursor = cnx.system_sql("SELECT DISTINCT type FROM entities;") + eidcolumn = SQL_PREFIX + 'eid' + msg = ' %s with eid %s has no %s (autofix will set it to now)' + for etype, in cursor.fetchall(): + if etype not in cnx.vreg.schema: + sys.stderr.write('entities table references unknown type %s\n' % + etype) + if fix: + cnx.system_sql("DELETE FROM entities WHERE type = %(type)s", + {'type': etype}) + continue + table = SQL_PREFIX + etype + for rel, default in ( ('creation_date', datetime.utcnow()), + ('modification_date', datetime.utcnow()), ): + column = SQL_PREFIX + rel + cursor = cnx.system_sql("SELECT %s FROM %s WHERE %s is NULL" + % (eidcolumn, table, column)) + for eid, in cursor.fetchall(): + sys.stderr.write(msg % (etype, eid, rel)) + if fix: + cnx.system_sql("UPDATE %s SET %s=%%(v)s WHERE %s=%s ;" + % (table, column, eidcolumn, eid), + {'v': default}) + notify_fixed(fix) + + +def check(repo, cnx, checks, reindex, fix, withpb=True): + """check integrity of instance's repository, + using given user and password to locally connect to the repository + (no running cubicweb server needed) + """ + # yo, launch checks + if checks: + eids_cache = {} + with cnx.security_enabled(read=False, write=False): # ensure no read security + for check in checks: + check_func = globals()['check_%s' % check] + check_func(repo.schema, cnx, eids_cache, fix=fix) + if fix: + cnx.commit() + else: + print() + if not fix: + print('WARNING: Diagnostic run, nothing has been corrected') + if reindex: + cnx.rollback() + reindex_entities(repo.schema, cnx, withpb=withpb) + cnx.commit()