--- a/server/checkintegrity.py Mon Jan 04 18:40:30 2016 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,410 +0,0 @@
-# copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
-# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
-#
-# This file is part of CubicWeb.
-#
-# CubicWeb is free software: you can redistribute it and/or modify it under the
-# terms of the GNU Lesser General Public License as published by the Free
-# Software Foundation, either version 2.1 of the License, or (at your option)
-# any later version.
-#
-# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Lesser General Public License along
-# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
-"""Integrity checking tool for instances:
-
-* integrity of a CubicWeb repository. Hum actually only the system database is
- checked.
-"""
-from __future__ import print_function
-
-__docformat__ = "restructuredtext en"
-
-import sys
-from datetime import datetime
-
-from logilab.common.shellutils import ProgressBar
-
-from cubicweb.schema import PURE_VIRTUAL_RTYPES, VIRTUAL_RTYPES, UNIQUE_CONSTRAINTS
-from cubicweb.server.sqlutils import SQL_PREFIX
-
-def notify_fixed(fix):
- if fix:
- sys.stderr.write(' [FIXED]')
- sys.stderr.write('\n')
-
-def has_eid(cnx, sqlcursor, eid, eids):
- """return true if the eid is a valid eid"""
- if eid in eids:
- return eids[eid]
- sqlcursor.execute('SELECT type FROM entities WHERE eid=%s' % eid)
- try:
- etype = sqlcursor.fetchone()[0]
- except Exception:
- eids[eid] = False
- return False
- if etype not in cnx.vreg.schema:
- eids[eid] = False
- return False
- sqlcursor.execute('SELECT * FROM %s%s WHERE %seid=%s' % (SQL_PREFIX, etype,
- SQL_PREFIX, eid))
- result = sqlcursor.fetchall()
- if len(result) == 0:
- eids[eid] = False
- return False
- elif len(result) > 1:
- msg = (' More than one entity with eid %s exists in source!\n'
- ' WARNING : Unable to fix this, do it yourself!\n')
- sys.stderr.write(msg % eid)
- eids[eid] = True
- return True
-
-# XXX move to yams?
-def etype_fti_containers(eschema, _done=None):
- if _done is None:
- _done = set()
- _done.add(eschema)
- containers = tuple(eschema.fulltext_containers())
- if containers:
- for rschema, target in containers:
- if target == 'object':
- targets = rschema.objects(eschema)
- else:
- targets = rschema.subjects(eschema)
- for targeteschema in targets:
- if targeteschema in _done:
- continue
- _done.add(targeteschema)
- for container in etype_fti_containers(targeteschema, _done):
- yield container
- else:
- yield eschema
-
-def reindex_entities(schema, cnx, withpb=True, etypes=None):
- """reindex all entities in the repository"""
- # deactivate modification_date hook since we don't want them
- # to be updated due to the reindexation
- repo = cnx.repo
- dbhelper = repo.system_source.dbhelper
- cursor = cnx.cnxset.cu
- if not dbhelper.has_fti_table(cursor):
- print('no text index table')
- dbhelper.init_fti(cursor)
- repo.system_source.do_fti = True # ensure full-text indexation is activated
- if etypes is None:
- print('Reindexing entities')
- etypes = set()
- for eschema in schema.entities():
- if eschema.final:
- continue
- indexable_attrs = tuple(eschema.indexable_attributes()) # generator
- if not indexable_attrs:
- continue
- for container in etype_fti_containers(eschema):
- etypes.add(container)
- # clear fti table first
- cnx.system_sql('DELETE FROM %s' % dbhelper.fti_table)
- else:
- print('Reindexing entities of type %s' % \
- ', '.join(sorted(str(e) for e in etypes)))
- # clear fti table first. Use subquery for sql compatibility
- cnx.system_sql("DELETE FROM %s WHERE EXISTS(SELECT 1 FROM ENTITIES "
- "WHERE eid=%s AND type IN (%s))" % (
- dbhelper.fti_table, dbhelper.fti_uid_attr,
- ','.join("'%s'" % etype for etype in etypes)))
- if withpb:
- pb = ProgressBar(len(etypes) + 1)
- pb.update()
- # reindex entities by generating rql queries which set all indexable
- # attribute to their current value
- source = repo.system_source
- for eschema in etypes:
- etype_class = cnx.vreg['etypes'].etype_class(str(eschema))
- for rset in etype_class.cw_fti_index_rql_limit(cnx):
- source.fti_index_entities(cnx, rset.entities())
- # clear entity cache to avoid high memory consumption on big tables
- cnx.drop_entity_cache()
- if withpb:
- pb.update()
- if withpb:
- pb.finish()
-
-
-def check_schema(schema, cnx, eids, fix=1):
- """check serialized schema"""
- print('Checking serialized schema')
- rql = ('Any COUNT(X),RN,SN,ON,CTN GROUPBY RN,SN,ON,CTN ORDERBY 1 '
- 'WHERE X is CWConstraint, R constrained_by X, '
- 'R relation_type RT, RT name RN, R from_entity ST, ST name SN, '
- 'R to_entity OT, OT name ON, X cstrtype CT, CT name CTN')
- for count, rn, sn, on, cstrname in cnx.execute(rql):
- if count == 1:
- continue
- if cstrname in UNIQUE_CONSTRAINTS:
- print("ERROR: got %s %r constraints on relation %s.%s.%s" % (
- count, cstrname, sn, rn, on))
- if fix:
- print('dunno how to fix, do it yourself')
-
-
-
-def check_text_index(schema, cnx, eids, fix=1):
- """check all entities registered in the text index"""
- print('Checking text index')
- msg = ' Entity with eid %s exists in the text index but in no source (autofix will remove from text index)'
- cursor = cnx.system_sql('SELECT uid FROM appears;')
- for row in cursor.fetchall():
- eid = row[0]
- if not has_eid(cnx, cursor, eid, eids):
- sys.stderr.write(msg % eid)
- if fix:
- cnx.system_sql('DELETE FROM appears WHERE uid=%s;' % eid)
- notify_fixed(fix)
-
-
-def check_entities(schema, cnx, eids, fix=1):
- """check all entities registered in the repo system table"""
- print('Checking entities system table')
- # system table but no source
- msg = ' Entity %s with eid %s exists in the system table but in no source (autofix will delete the entity)'
- cursor = cnx.system_sql('SELECT eid,type FROM entities;')
- for row in cursor.fetchall():
- eid, etype = row
- if not has_eid(cnx, cursor, eid, eids):
- sys.stderr.write(msg % (etype, eid))
- if fix:
- cnx.system_sql('DELETE FROM entities WHERE eid=%s;' % eid)
- notify_fixed(fix)
- # source in entities, but no relation cw_source
- # XXX this (get_versions) requires a second connection to the db when we already have one open
- applcwversion = cnx.repo.get_versions().get('cubicweb')
- if applcwversion >= (3, 13, 1): # entities.asource appeared in 3.13.1
- cursor = cnx.system_sql('SELECT e.eid FROM entities as e, cw_CWSource as s '
- 'WHERE s.cw_name=e.asource AND '
- 'NOT EXISTS(SELECT 1 FROM cw_source_relation as cs '
- ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) '
- 'ORDER BY e.eid')
- msg = (' Entity with eid %s refers to source in entities table, '
- 'but is missing relation cw_source (autofix will create the relation)\n')
- for row in cursor.fetchall():
- sys.stderr.write(msg % row[0])
- if fix:
- cnx.system_sql('INSERT INTO cw_source_relation (eid_from, eid_to) '
- 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWSource as s '
- 'WHERE s.cw_name=e.asource AND NOT EXISTS(SELECT 1 FROM cw_source_relation as cs '
- ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)')
- notify_fixed(True)
- # inconsistencies for 'is'
- msg = ' %s #%s is missing relation "is" (autofix will create the relation)\n'
- cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s '
- 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs '
- ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) '
- 'ORDER BY e.eid')
- for row in cursor.fetchall():
- sys.stderr.write(msg % tuple(row))
- if fix:
- cnx.system_sql('INSERT INTO is_relation (eid_from, eid_to) '
- 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s '
- 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs '
- ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)')
- notify_fixed(True)
- # inconsistencies for 'is_instance_of'
- msg = ' %s #%s is missing relation "is_instance_of" (autofix will create the relation)\n'
- cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s '
- 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs '
- ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) '
- 'ORDER BY e.eid')
- for row in cursor.fetchall():
- sys.stderr.write(msg % tuple(row))
- if fix:
- cnx.system_sql('INSERT INTO is_instance_of_relation (eid_from, eid_to) '
- 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s '
- 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs '
- ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)')
- notify_fixed(True)
- print('Checking entities tables')
- msg = ' Entity with eid %s exists in the %s table but not in the system table (autofix will delete the entity)'
- for eschema in schema.entities():
- if eschema.final:
- continue
- table = SQL_PREFIX + eschema.type
- column = SQL_PREFIX + 'eid'
- cursor = cnx.system_sql('SELECT %s FROM %s;' % (column, table))
- for row in cursor.fetchall():
- eid = row[0]
- # eids is full since we have fetched everything from the entities table,
- # no need to call has_eid
- if not eid in eids or not eids[eid]:
- sys.stderr.write(msg % (eid, eschema.type))
- if fix:
- cnx.system_sql('DELETE FROM %s WHERE %s=%s;' % (table, column, eid))
- notify_fixed(fix)
-
-
-def bad_related_msg(rtype, target, eid, fix):
- msg = ' A relation %s with %s eid %s exists but no such entity in sources'
- sys.stderr.write(msg % (rtype, target, eid))
- notify_fixed(fix)
-
-def bad_inlined_msg(rtype, parent_eid, eid, fix):
- msg = (' An inlined relation %s from %s to %s exists but the latter '
- 'entity does not exist')
- sys.stderr.write(msg % (rtype, parent_eid, eid))
- notify_fixed(fix)
-
-
-def check_relations(schema, cnx, eids, fix=1):
- """check that eids referenced by relations are registered in the repo system
- table
- """
- print('Checking relations')
- for rschema in schema.relations():
- if rschema.final or rschema.type in PURE_VIRTUAL_RTYPES:
- continue
- if rschema.inlined:
- for subjtype in rschema.subjects():
- table = SQL_PREFIX + str(subjtype)
- column = SQL_PREFIX + str(rschema)
- sql = 'SELECT cw_eid,%s FROM %s WHERE %s IS NOT NULL;' % (
- column, table, column)
- cursor = cnx.system_sql(sql)
- for row in cursor.fetchall():
- parent_eid, eid = row
- if not has_eid(cnx, cursor, eid, eids):
- bad_inlined_msg(rschema, parent_eid, eid, fix)
- if fix:
- sql = 'UPDATE %s SET %s=NULL WHERE %s=%s;' % (
- table, column, column, eid)
- cnx.system_sql(sql)
- continue
- try:
- cursor = cnx.system_sql('SELECT eid_from FROM %s_relation;' % rschema)
- except Exception as ex:
- # usually because table doesn't exist
- print('ERROR', ex)
- continue
- for row in cursor.fetchall():
- eid = row[0]
- if not has_eid(cnx, cursor, eid, eids):
- bad_related_msg(rschema, 'subject', eid, fix)
- if fix:
- sql = 'DELETE FROM %s_relation WHERE eid_from=%s;' % (
- rschema, eid)
- cnx.system_sql(sql)
- cursor = cnx.system_sql('SELECT eid_to FROM %s_relation;' % rschema)
- for row in cursor.fetchall():
- eid = row[0]
- if not has_eid(cnx, cursor, eid, eids):
- bad_related_msg(rschema, 'object', eid, fix)
- if fix:
- sql = 'DELETE FROM %s_relation WHERE eid_to=%s;' % (
- rschema, eid)
- cnx.system_sql(sql)
-
-
-def check_mandatory_relations(schema, cnx, eids, fix=1):
- """check entities missing some mandatory relation"""
- print('Checking mandatory relations')
- msg = '%s #%s is missing mandatory %s relation %s (autofix will delete the entity)'
- for rschema in schema.relations():
- if rschema.final or rschema in PURE_VIRTUAL_RTYPES or rschema in ('is', 'is_instance_of'):
- continue
- smandatory = set()
- omandatory = set()
- for rdef in rschema.rdefs.values():
- if rdef.cardinality[0] in '1+':
- smandatory.add(rdef.subject)
- if rdef.cardinality[1] in '1+':
- omandatory.add(rdef.object)
- for role, etypes in (('subject', smandatory), ('object', omandatory)):
- for etype in etypes:
- if role == 'subject':
- rql = 'Any X WHERE NOT X %s Y, X is %s' % (rschema, etype)
- else:
- rql = 'Any X WHERE NOT Y %s X, X is %s' % (rschema, etype)
- for entity in cnx.execute(rql).entities():
- sys.stderr.write(msg % (entity.cw_etype, entity.eid, role, rschema))
- if fix:
- #if entity.cw_describe()['source']['uri'] == 'system': XXX
- entity.cw_delete() # XXX this is BRUTAL!
- notify_fixed(fix)
-
-
-def check_mandatory_attributes(schema, cnx, eids, fix=1):
- """check for entities stored in the system source missing some mandatory
- attribute
- """
- print('Checking mandatory attributes')
- msg = '%s #%s is missing mandatory attribute %s (autofix will delete the entity)'
- for rschema in schema.relations():
- if not rschema.final or rschema in VIRTUAL_RTYPES:
- continue
- for rdef in rschema.rdefs.values():
- if rdef.cardinality[0] in '1+':
- rql = 'Any X WHERE X %s NULL, X is %s, X cw_source S, S name "system"' % (
- rschema, rdef.subject)
- for entity in cnx.execute(rql).entities():
- sys.stderr.write(msg % (entity.cw_etype, entity.eid, rschema))
- if fix:
- entity.cw_delete()
- notify_fixed(fix)
-
-
-def check_metadata(schema, cnx, eids, fix=1):
- """check entities has required metadata
-
- FIXME: rewrite using RQL queries ?
- """
- print('Checking metadata')
- cursor = cnx.system_sql("SELECT DISTINCT type FROM entities;")
- eidcolumn = SQL_PREFIX + 'eid'
- msg = ' %s with eid %s has no %s (autofix will set it to now)'
- for etype, in cursor.fetchall():
- if etype not in cnx.vreg.schema:
- sys.stderr.write('entities table references unknown type %s\n' %
- etype)
- if fix:
- cnx.system_sql("DELETE FROM entities WHERE type = %(type)s",
- {'type': etype})
- continue
- table = SQL_PREFIX + etype
- for rel, default in ( ('creation_date', datetime.utcnow()),
- ('modification_date', datetime.utcnow()), ):
- column = SQL_PREFIX + rel
- cursor = cnx.system_sql("SELECT %s FROM %s WHERE %s is NULL"
- % (eidcolumn, table, column))
- for eid, in cursor.fetchall():
- sys.stderr.write(msg % (etype, eid, rel))
- if fix:
- cnx.system_sql("UPDATE %s SET %s=%%(v)s WHERE %s=%s ;"
- % (table, column, eidcolumn, eid),
- {'v': default})
- notify_fixed(fix)
-
-
-def check(repo, cnx, checks, reindex, fix, withpb=True):
- """check integrity of instance's repository,
- using given user and password to locally connect to the repository
- (no running cubicweb server needed)
- """
- # yo, launch checks
- if checks:
- eids_cache = {}
- with cnx.security_enabled(read=False, write=False): # ensure no read security
- for check in checks:
- check_func = globals()['check_%s' % check]
- check_func(repo.schema, cnx, eids_cache, fix=fix)
- if fix:
- cnx.commit()
- else:
- print()
- if not fix:
- print('WARNING: Diagnostic run, nothing has been corrected')
- if reindex:
- cnx.rollback()
- reindex_entities(repo.schema, cnx, withpb=withpb)
- cnx.commit()