"""Check integrity of a CubicWeb repository. Hum actually only the system databaseis checked.:organization: Logilab:copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2.:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses"""__docformat__="restructuredtext en"importsysfromdatetimeimportdatetimefromlogilab.common.shellutilsimportProgressBarfromcubicweb.schemaimportPURE_VIRTUAL_RTYPESfromcubicweb.server.sqlutilsimportSQL_PREFIXdefhas_eid(sqlcursor,eid,eids):"""return true if the eid is a valid eid"""ifeids.has_key(eid):returneids[eid]sqlcursor.execute('SELECT type, source FROM entities WHERE eid=%s'%eid)try:etype,source=sqlcursor.fetchone()except:eids[eid]=FalsereturnFalseifsourceandsource!='system':# XXX what to do...eids[eid]=TruereturnTruesqlcursor.execute('SELECT * FROM %s%s WHERE %seid=%s'%(SQL_PREFIX,etype,SQL_PREFIX,eid))result=sqlcursor.fetchall()iflen(result)==0:eids[eid]=FalsereturnFalseeliflen(result)>1:msg=' More than one entity with eid %s exists in source !'print>>sys.stderr,msg%eidprint>>sys.stderr,' WARNING : Unable to fix this, do it yourself !'eids[eid]=TruereturnTrue# XXX move to yams?defetype_fti_containers(eschema,_done=None):if_doneisNone:_done=set()_done.add(eschema)containers=tuple(eschema.fulltext_containers())ifcontainers:forrschema,targetincontainers:iftarget=='object':targets=rschema.objects(eschema)else:targets=rschema.subjects(eschema)fortargeteschemaintargets:iftargeteschemain_done:continue_done.add(targeteschema)forcontainerinetype_fti_containers(targeteschema,_done):yieldcontainerelse:yieldeschemadefreindex_entities(schema,session):"""reindex all entities in the repository"""# deactivate modification_date hook since we don't want them# to be updated due to the reindexationfromcubicweb.server.hooksimport(setmtime_before_update_entity,uniquecstrcheck_before_modification)fromcubicweb.server.repositoryimportFTIndexEntityOprepo=session.repocursor=session.pool['system']ifnotrepo.system_source.indexer.has_fti_table(cursor):fromindexerimportget_indexerprint'no text index table'indexer=get_indexer(repo.system_source.dbdriver)# XXX indexer.init_fti(cursor) once index 0.7 is outindexer.init_extensions(cursor)cursor.execute(indexer.sql_init_fti())repo.hm.unregister_hook(setmtime_before_update_entity,'before_update_entity','')repo.hm.unregister_hook(uniquecstrcheck_before_modification,'before_update_entity','')repo.do_fti=True# ensure full-text indexation is activatedetypes=set()foreschemainschema.entities():ifeschema.is_final():continueindexable_attrs=tuple(eschema.indexable_attributes())# generatorifnotindexable_attrs:continueforcontainerinetype_fti_containers(eschema):etypes.add(container)print'Reindexing entities of type %s'% \', '.join(sorted(str(e)foreinetypes))pb=ProgressBar(len(etypes)+1)# first monkey patch Entity.check to disable validationfromcubicweb.entityimportEntity_check=Entity.checkEntity.check=lambdaself,creation=False:True# clear fti table firstsession.system_sql('DELETE FROM %s'%session.repo.system_source.dbhelper.fti_table)pb.update()# reindex entities by generating rql queries which set all indexable# attribute to their current valueforeschemainetypes:forentityinsession.execute('Any X WHERE X is %s'%eschema).entities():FTIndexEntityOp(session,entity=entity)pb.update()# restore Entity.checkEntity.check=_checkdefcheck_schema(schema,session,eids,fix=1):"""check serialized schema"""print'Checking serialized schema'unique_constraints=('SizeConstraint','FormatConstraint','VocabularyConstraint','RQLConstraint','RQLVocabularyConstraint')rql=('Any COUNT(X),RN,EN,ECTN GROUPBY RN,EN,ECTN ORDERBY 1 ''WHERE X is CWConstraint, R constrained_by X, ''R relation_type RT, R from_entity ET, RT name RN, ''ET name EN, X cstrtype ECT, ECT name ECTN')forcount,rn,en,cstrnameinsession.execute(rql):ifcount==1:continueifcstrnameinunique_constraints:print"ERROR: got %s%r constraints on relation %s.%s"%(count,cstrname,en,rn)defcheck_text_index(schema,session,eids,fix=1):"""check all entities registered in the text index"""print'Checking text index'cursor=session.system_sql('SELECT uid FROM appears;')forrowincursor.fetchall():eid=row[0]ifnothas_eid(cursor,eid,eids):msg=' Entity with eid %s exists in the text index but in no source'print>>sys.stderr,msg%eid,iffix:session.system_sql('DELETE FROM appears WHERE uid=%s;'%eid)print>>sys.stderr,' [FIXED]'else:print>>sys.stderrdefcheck_entities(schema,session,eids,fix=1):"""check all entities registered in the repo system table"""print'Checking entities system table'cursor=session.system_sql('SELECT eid FROM entities;')forrowincursor.fetchall():eid=row[0]ifnothas_eid(cursor,eid,eids):msg=' Entity with eid %s exists in the system table but in no source'print>>sys.stderr,msg%eid,iffix:session.system_sql('DELETE FROM entities WHERE eid=%s;'%eid)print>>sys.stderr,' [FIXED]'else:print>>sys.stderrprint'Checking entities tables'foreschemainschema.entities():ifeschema.is_final():continuetable=SQL_PREFIX+eschema.typecolumn=SQL_PREFIX+'eid'cursor=session.system_sql('SELECT %s FROM %s;'%(column,table))forrowincursor.fetchall():eid=row[0]# eids is full since we have fetched everyting from the entities table,# no need to call has_eidifnoteidineidsornoteids[eid]:msg=' Entity with eid %s exists in the %s table but not in the system table'print>>sys.stderr,msg%(eid,eschema.type),iffix:session.system_sql('DELETE FROM %s WHERE %s=%s;'%(table,column,eid))print>>sys.stderr,' [FIXED]'else:print>>sys.stderrdefbad_related_msg(rtype,target,eid,fix):msg=' A relation %s with %s eid %s exists but no such entity in sources'print>>sys.stderr,msg%(rtype,target,eid),iffix:print>>sys.stderr,' [FIXED]'else:print>>sys.stderrdefcheck_relations(schema,session,eids,fix=1):"""check all relations registered in the repo system table"""print'Checking relations'forrschemainschema.relations():ifrschema.is_final()orrschemainPURE_VIRTUAL_RTYPES:continueifrschema.inlined:forsubjtypeinrschema.subjects():table=SQL_PREFIX+str(subjtype)column=SQL_PREFIX+str(rschema)sql='SELECT %s FROM %s WHERE %s IS NOT NULL;'%(column,table,column)cursor=session.system_sql(sql)forrowincursor.fetchall():eid=row[0]ifnothas_eid(cursor,eid,eids):bad_related_msg(rschema,'object',eid,fix)iffix:sql='UPDATE %s SET %s = NULL WHERE %seid=%s;'%(table,column,SQL_PREFIX,eid)session.system_sql(sql)continuecursor=session.system_sql('SELECT eid_from FROM %s_relation;'%rschema)forrowincursor.fetchall():eid=row[0]ifnothas_eid(cursor,eid,eids):bad_related_msg(rschema,'subject',eid,fix)iffix:sql='DELETE FROM %s_relation WHERE eid_from=%s;'%(rschema,eid)session.system_sql(sql)cursor=session.system_sql('SELECT eid_to FROM %s_relation;'%rschema)forrowincursor.fetchall():eid=row[0]ifnothas_eid(cursor,eid,eids):bad_related_msg(rschema,'object',eid,fix)iffix:sql='DELETE FROM %s_relation WHERE eid_to=%s;'%(rschema,eid)session.system_sql(sql)defcheck_metadata(schema,session,eids,fix=1):"""check entities has required metadata FIXME: rewrite using RQL queries ? """print'Checking metadata'cursor=session.system_sql("SELECT DISTINCT type FROM entities;")eidcolumn=SQL_PREFIX+'eid'foretype,incursor.fetchall():table=SQL_PREFIX+etypeforrel,defaultin(('creation_date',datetime.now()),('modification_date',datetime.now()),):column=SQL_PREFIX+relcursor=session.system_sql("SELECT %s FROM %s WHERE %s is NULL"%(eidcolumn,table,column))foreid,incursor.fetchall():msg=' %s with eid %s has no %s'print>>sys.stderr,msg%(etype,eid,rel),iffix:session.system_sql("UPDATE %s SET %s=%%(v)s WHERE %s=%s ;"%(table,column,eidcolumn,eid),{'v':default})print>>sys.stderr,' [FIXED]'else:print>>sys.stderrcursor=session.system_sql('SELECT MIN(%s) FROM %sCWUser;'%(eidcolumn,SQL_PREFIX))default_user_eid=cursor.fetchone()[0]assertdefault_user_eidisnotNone,'no user defined !'forrel,defaultin(('owned_by',default_user_eid),):cursor=session.system_sql("SELECT eid, type FROM entities ""WHERE NOT EXISTS ""(SELECT 1 FROM %s_relation WHERE eid_from=eid);"%rel)foreid,etypeincursor.fetchall():msg=' %s with eid %s has no %s relation'print>>sys.stderr,msg%(etype,eid,rel),iffix:session.system_sql('INSERT INTO %s_relation VALUES (%s, %s) ;'%(rel,eid,default))print>>sys.stderr,' [FIXED]'else:print>>sys.stderrdefcheck(repo,cnx,checks,reindex,fix):"""check integrity of instance's repository, using given user and password to locally connect to the repository (no running cubicweb server needed) """session=repo._get_session(cnx.sessionid,setpool=True)# yo, launch checksifchecks:eids_cache={}forcheckinchecks:check_func=globals()['check_%s'%check]check_func(repo.schema,session,eids_cache,fix=fix)iffix:cnx.commit()else:printifnotfix:print'WARNING: Diagnostic run, nothing has been corrected'ifreindex:cnx.rollback()session.set_pool()reindex_entities(repo.schema,session)cnx.commit()