|
1 """Check integrity of a CubicWeb repository. Hum actually only the system database |
|
2 is checked. |
|
3 |
|
4 :organization: Logilab |
|
5 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
6 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
7 """ |
|
8 __docformat__ = "restructuredtext en" |
|
9 |
|
10 import sys |
|
11 |
|
12 from mx.DateTime import now |
|
13 from logilab.common.shellutils import ProgressBar |
|
14 |
|
15 def has_eid(sqlcursor, eid, eids): |
|
16 """return true if the eid is a valid eid""" |
|
17 if eids.has_key(eid): |
|
18 return eids[eid] |
|
19 sqlcursor.execute('SELECT type, source FROM entities WHERE eid=%s' % eid) |
|
20 try: |
|
21 etype, source = sqlcursor.fetchone() |
|
22 except: |
|
23 eids[eid] = False |
|
24 return False |
|
25 if source and source != 'system': |
|
26 # XXX what to do... |
|
27 eids[eid] = True |
|
28 return True |
|
29 sqlcursor.execute('SELECT * FROM %s WHERE eid=%s' % (etype, eid)) |
|
30 result = sqlcursor.fetchall() |
|
31 if len(result) == 0: |
|
32 eids[eid] = False |
|
33 return False |
|
34 elif len(result) > 1: |
|
35 msg = ' More than one entity with eid %s exists in source !' |
|
36 print >> sys.stderr, msg % eid |
|
37 print >> sys.stderr, ' WARNING : Unable to fix this, do it yourself !' |
|
38 eids[eid] = True |
|
39 return True |
|
40 |
|
41 # XXX move to yams? |
|
42 def etype_fti_containers(eschema, _done=None): |
|
43 if _done is None: |
|
44 _done = set() |
|
45 _done.add(eschema) |
|
46 containers = tuple(eschema.fulltext_containers()) |
|
47 if containers: |
|
48 for rschema, target in containers: |
|
49 if target == 'object': |
|
50 targets = rschema.objects(eschema) |
|
51 else: |
|
52 targets = rschema.subjects(eschema) |
|
53 for targeteschema in targets: |
|
54 if targeteschema in _done: |
|
55 continue |
|
56 _done.add(targeteschema) |
|
57 for container in etype_fti_containers(targeteschema, _done): |
|
58 yield container |
|
59 else: |
|
60 yield eschema |
|
61 |
|
62 def reindex_entities(schema, session): |
|
63 """reindex all entities in the repository""" |
|
64 # deactivate modification_date hook since we don't want them |
|
65 # to be updated due to the reindexation |
|
66 from cubicweb.server.hooks import (setmtime_before_update_entity, |
|
67 uniquecstrcheck_before_modification) |
|
68 from cubicweb.server.repository import FTIndexEntityOp |
|
69 repo = session.repo |
|
70 repo.hm.unregister_hook(setmtime_before_update_entity, |
|
71 'before_update_entity', '') |
|
72 repo.hm.unregister_hook(uniquecstrcheck_before_modification, |
|
73 'before_update_entity', '') |
|
74 etypes = set() |
|
75 for eschema in schema.entities(): |
|
76 if eschema.is_final(): |
|
77 continue |
|
78 indexable_attrs = tuple(eschema.indexable_attributes()) # generator |
|
79 if not indexable_attrs: |
|
80 continue |
|
81 for container in etype_fti_containers(eschema): |
|
82 etypes.add(container) |
|
83 print 'Reindexing entities of type %s' % \ |
|
84 ', '.join(sorted(str(e) for e in etypes)) |
|
85 pb = ProgressBar(len(etypes) + 1) |
|
86 # first monkey patch Entity.check to disable validation |
|
87 from cubicweb.common.entity import Entity |
|
88 _check = Entity.check |
|
89 Entity.check = lambda self, creation=False: True |
|
90 # clear fti table first |
|
91 session.system_sql('DELETE FROM %s' % session.repo.system_source.dbhelper.fti_table) |
|
92 pb.update() |
|
93 # reindex entities by generating rql queries which set all indexable |
|
94 # attribute to their current value |
|
95 for eschema in etypes: |
|
96 for entity in session.execute('Any X WHERE X is %s' % eschema).entities(): |
|
97 FTIndexEntityOp(session, entity=entity) |
|
98 pb.update() |
|
99 # restore Entity.check |
|
100 Entity.check = _check |
|
101 |
|
102 |
|
103 def check_schema(session): |
|
104 """check serialized schema""" |
|
105 print 'Checking serialized schema' |
|
106 unique_constraints = ('SizeConstraint', 'FormatConstraint', |
|
107 'VocabularyConstraint', 'RQLConstraint', |
|
108 'RQLVocabularyConstraint') |
|
109 rql = ('Any COUNT(X),RN,EN,ECTN GROUPBY RN,EN,ECTN ORDERBY 1 ' |
|
110 'WHERE X is Econstraint, R constrained_by X, ' |
|
111 'R relation_type RT, R from_entity ET, RT name RN, ' |
|
112 'ET name EN, X cstrtype ECT, ECT name ECTN') |
|
113 for count, rn, en, cstrname in session.execute(rql): |
|
114 if count == 1: |
|
115 continue |
|
116 if cstrname in unique_constraints: |
|
117 print "ERROR: got %s %r constraints on relation %s.%s" % ( |
|
118 count, cstrname, en, rn) |
|
119 |
|
120 |
|
121 |
|
122 def check_text_index(schema, session, eids, fix=1): |
|
123 """check all entities registered in the text index""" |
|
124 print 'Checking text index' |
|
125 cursor = session.system_sql('SELECT uid FROM appears;') |
|
126 for row in cursor.fetchall(): |
|
127 eid = row[0] |
|
128 if not has_eid(cursor, eid, eids): |
|
129 msg = ' Entity with eid %s exists in the text index but in no source' |
|
130 print >> sys.stderr, msg % eid, |
|
131 if fix: |
|
132 session.system_sql('DELETE FROM appears WHERE uid=%s;' % eid) |
|
133 print >> sys.stderr, ' [FIXED]' |
|
134 else: |
|
135 print >> sys.stderr |
|
136 |
|
137 |
|
138 def check_entities(schema, session, eids, fix=1): |
|
139 """check all entities registered in the repo system table""" |
|
140 print 'Checking entities system table' |
|
141 cursor = session.system_sql('SELECT eid FROM entities;') |
|
142 for row in cursor.fetchall(): |
|
143 eid = row[0] |
|
144 if not has_eid(cursor, eid, eids): |
|
145 msg = ' Entity with eid %s exists in the system table but in no source' |
|
146 print >> sys.stderr, msg % eid, |
|
147 if fix: |
|
148 session.system_sql('DELETE FROM entities WHERE eid=%s;' % eid) |
|
149 print >> sys.stderr, ' [FIXED]' |
|
150 else: |
|
151 print >> sys.stderr |
|
152 print 'Checking entities tables' |
|
153 for eschema in schema.entities(): |
|
154 if eschema.is_final(): |
|
155 continue |
|
156 cursor = session.system_sql('SELECT eid FROM %s;' % eschema.type) |
|
157 for row in cursor.fetchall(): |
|
158 eid = row[0] |
|
159 # eids is full since we have fetched everyting from the entities table, |
|
160 # no need to call has_eid |
|
161 if not eid in eids or not eids[eid]: |
|
162 msg = ' Entity with eid %s exists in the %s table but not in the system table' |
|
163 print >> sys.stderr, msg % (eid, eschema.type), |
|
164 if fix: |
|
165 session.system_sql('DELETE FROM %s WHERE eid=%s;' % (eschema.type, eid)) |
|
166 print >> sys.stderr, ' [FIXED]' |
|
167 else: |
|
168 print >> sys.stderr |
|
169 |
|
170 |
|
171 def bad_related_msg(rtype, target, eid, fix): |
|
172 msg = ' A relation %s with %s eid %s exists but no such entity in sources' |
|
173 print >> sys.stderr, msg % (rtype, target, eid), |
|
174 if fix: |
|
175 print >> sys.stderr, ' [FIXED]' |
|
176 else: |
|
177 print >> sys.stderr |
|
178 |
|
179 |
|
180 def check_relations(schema, session, eids, fix=1): |
|
181 """check all relations registered in the repo system table""" |
|
182 print 'Checking relations' |
|
183 for rschema in schema.relations(): |
|
184 if rschema.is_final(): |
|
185 continue |
|
186 rtype = rschema.type |
|
187 if rtype == 'identity': |
|
188 continue |
|
189 if rschema.inlined: |
|
190 for subjtype in rschema.subjects(): |
|
191 cursor = session.system_sql('SELECT %s FROM %s WHERE %s IS NOT NULL;' |
|
192 % (rtype, subjtype, rtype)) |
|
193 for row in cursor.fetchall(): |
|
194 eid = row[0] |
|
195 if not has_eid(cursor, eid, eids): |
|
196 bad_related_msg(rtype, 'object', eid, fix) |
|
197 if fix: |
|
198 session.system_sql('UPDATE %s SET %s = NULL WHERE eid=%s;' |
|
199 % (subjtype, rtype, eid)) |
|
200 continue |
|
201 cursor = session.system_sql('SELECT eid_from FROM %s_relation;' % rtype) |
|
202 for row in cursor.fetchall(): |
|
203 eid = row[0] |
|
204 if not has_eid(cursor, eid, eids): |
|
205 bad_related_msg(rtype, 'subject', eid, fix) |
|
206 if fix: |
|
207 session.system_sql( |
|
208 'DELETE FROM %s_relations WHERE eid_from=%s;' % (rtype, eid)) |
|
209 cursor = session.system_sql('SELECT eid_to FROM %s_relation;' % rtype) |
|
210 for row in cursor.fetchall(): |
|
211 eid = row[0] |
|
212 if not has_eid(cursor, eid, eids): |
|
213 bad_related_msg(rtype, 'object', eid, fix) |
|
214 if fix: |
|
215 session.system_sql('DELETE FROM relations WHERE eid_to=%s;' % eid) |
|
216 |
|
217 |
|
218 def check_metadata(schema, session, eids, fix=1): |
|
219 """check entities has required metadata |
|
220 |
|
221 FIXME: rewrite using RQL queries ? |
|
222 """ |
|
223 print 'Checking metadata' |
|
224 cursor = session.system_sql("SELECT DISTINCT type FROM entities;") |
|
225 for etype, in cursor.fetchall(): |
|
226 for rel, default in ( ('creation_date', now()), |
|
227 ('modification_date', now()), ): |
|
228 cursor = session.system_sql("SELECT eid FROM %s " |
|
229 "WHERE %s is NULL" % (etype, rel)) |
|
230 for eid, in cursor.fetchall(): |
|
231 msg = ' %s with eid %s has no %s' |
|
232 print >> sys.stderr, msg % (etype, eid, rel), |
|
233 if fix: |
|
234 session.system_sql("UPDATE %s SET %s=%(default)s WHERE eid=%s ;" |
|
235 % (etype, rel, eid), {'default': default}) |
|
236 print >> sys.stderr, ' [FIXED]' |
|
237 else: |
|
238 print >> sys.stderr |
|
239 cursor = session.system_sql('SELECT MIN(eid) FROM euser;') |
|
240 default_user_eid = cursor.fetchone()[0] |
|
241 assert default_user_eid is not None, 'no user defined !' |
|
242 for rel, default in ( ('owned_by', default_user_eid), ): |
|
243 cursor = session.system_sql("SELECT eid, type FROM entities " |
|
244 "WHERE NOT EXISTS " |
|
245 "(SELECT 1 FROM %s_relation WHERE eid_from=eid);" |
|
246 % rel) |
|
247 for eid, etype in cursor.fetchall(): |
|
248 msg = ' %s with eid %s has no %s relation' |
|
249 print >> sys.stderr, msg % (etype, eid, rel), |
|
250 if fix: |
|
251 session.system_sql('INSERT INTO %s_relation VALUES (%s, %s) ;' |
|
252 % (rel, eid, default)) |
|
253 print >> sys.stderr, ' [FIXED]' |
|
254 else: |
|
255 print >> sys.stderr |
|
256 |
|
257 |
|
258 def check(repo, cnx, checks, reindex, fix): |
|
259 """check integrity of application's repository, |
|
260 using given user and password to locally connect to the repository |
|
261 (no running cubicweb server needed) |
|
262 """ |
|
263 session = repo._get_session(cnx.sessionid, setpool=True) |
|
264 # yo, launch checks |
|
265 if checks: |
|
266 eids_cache = {} |
|
267 for check in checks: |
|
268 check_func = globals()['check_%s' % check] |
|
269 check_func(repo.schema, session, eids_cache, fix=fix) |
|
270 if fix: |
|
271 cnx.commit() |
|
272 else: |
|
273 print |
|
274 if not fix: |
|
275 print 'WARNING: Diagnostic run, nothing has been corrected' |
|
276 if reindex: |
|
277 cnx.rollback() |
|
278 session.set_pool() |
|
279 reindex_entities(repo.schema, session) |
|
280 cnx.commit() |