|
1 # copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
3 # |
|
4 # This file is part of CubicWeb. |
|
5 # |
|
6 # CubicWeb is free software: you can redistribute it and/or modify it under the |
|
7 # terms of the GNU Lesser General Public License as published by the Free |
|
8 # Software Foundation, either version 2.1 of the License, or (at your option) |
|
9 # any later version. |
|
10 # |
|
11 # CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
|
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
|
14 # details. |
|
15 # |
|
16 # You should have received a copy of the GNU Lesser General Public License along |
|
17 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
|
18 """Integrity checking tool for instances: |
|
19 |
|
20 * integrity of a CubicWeb repository. Hum actually only the system database is |
|
21 checked. |
|
22 """ |
|
23 from __future__ import print_function |
|
24 |
|
25 __docformat__ = "restructuredtext en" |
|
26 |
|
27 import sys |
|
28 from datetime import datetime |
|
29 |
|
30 from logilab.common.shellutils import ProgressBar |
|
31 |
|
32 from cubicweb.schema import PURE_VIRTUAL_RTYPES, VIRTUAL_RTYPES, UNIQUE_CONSTRAINTS |
|
33 from cubicweb.server.sqlutils import SQL_PREFIX |
|
34 |
|
35 def notify_fixed(fix): |
|
36 if fix: |
|
37 sys.stderr.write(' [FIXED]') |
|
38 sys.stderr.write('\n') |
|
39 |
|
40 def has_eid(cnx, sqlcursor, eid, eids): |
|
41 """return true if the eid is a valid eid""" |
|
42 if eid in eids: |
|
43 return eids[eid] |
|
44 sqlcursor.execute('SELECT type FROM entities WHERE eid=%s' % eid) |
|
45 try: |
|
46 etype = sqlcursor.fetchone()[0] |
|
47 except Exception: |
|
48 eids[eid] = False |
|
49 return False |
|
50 if etype not in cnx.vreg.schema: |
|
51 eids[eid] = False |
|
52 return False |
|
53 sqlcursor.execute('SELECT * FROM %s%s WHERE %seid=%s' % (SQL_PREFIX, etype, |
|
54 SQL_PREFIX, eid)) |
|
55 result = sqlcursor.fetchall() |
|
56 if len(result) == 0: |
|
57 eids[eid] = False |
|
58 return False |
|
59 elif len(result) > 1: |
|
60 msg = (' More than one entity with eid %s exists in source!\n' |
|
61 ' WARNING : Unable to fix this, do it yourself!\n') |
|
62 sys.stderr.write(msg % eid) |
|
63 eids[eid] = True |
|
64 return True |
|
65 |
|
66 # XXX move to yams? |
|
67 def etype_fti_containers(eschema, _done=None): |
|
68 if _done is None: |
|
69 _done = set() |
|
70 _done.add(eschema) |
|
71 containers = tuple(eschema.fulltext_containers()) |
|
72 if containers: |
|
73 for rschema, target in containers: |
|
74 if target == 'object': |
|
75 targets = rschema.objects(eschema) |
|
76 else: |
|
77 targets = rschema.subjects(eschema) |
|
78 for targeteschema in targets: |
|
79 if targeteschema in _done: |
|
80 continue |
|
81 _done.add(targeteschema) |
|
82 for container in etype_fti_containers(targeteschema, _done): |
|
83 yield container |
|
84 else: |
|
85 yield eschema |
|
86 |
|
87 def reindex_entities(schema, cnx, withpb=True, etypes=None): |
|
88 """reindex all entities in the repository""" |
|
89 # deactivate modification_date hook since we don't want them |
|
90 # to be updated due to the reindexation |
|
91 repo = cnx.repo |
|
92 dbhelper = repo.system_source.dbhelper |
|
93 cursor = cnx.cnxset.cu |
|
94 if not dbhelper.has_fti_table(cursor): |
|
95 print('no text index table') |
|
96 dbhelper.init_fti(cursor) |
|
97 repo.system_source.do_fti = True # ensure full-text indexation is activated |
|
98 if etypes is None: |
|
99 print('Reindexing entities') |
|
100 etypes = set() |
|
101 for eschema in schema.entities(): |
|
102 if eschema.final: |
|
103 continue |
|
104 indexable_attrs = tuple(eschema.indexable_attributes()) # generator |
|
105 if not indexable_attrs: |
|
106 continue |
|
107 for container in etype_fti_containers(eschema): |
|
108 etypes.add(container) |
|
109 # clear fti table first |
|
110 cnx.system_sql('DELETE FROM %s' % dbhelper.fti_table) |
|
111 else: |
|
112 print('Reindexing entities of type %s' % \ |
|
113 ', '.join(sorted(str(e) for e in etypes))) |
|
114 # clear fti table first. Use subquery for sql compatibility |
|
115 cnx.system_sql("DELETE FROM %s WHERE EXISTS(SELECT 1 FROM ENTITIES " |
|
116 "WHERE eid=%s AND type IN (%s))" % ( |
|
117 dbhelper.fti_table, dbhelper.fti_uid_attr, |
|
118 ','.join("'%s'" % etype for etype in etypes))) |
|
119 if withpb: |
|
120 pb = ProgressBar(len(etypes) + 1) |
|
121 pb.update() |
|
122 # reindex entities by generating rql queries which set all indexable |
|
123 # attribute to their current value |
|
124 source = repo.system_source |
|
125 for eschema in etypes: |
|
126 etype_class = cnx.vreg['etypes'].etype_class(str(eschema)) |
|
127 for rset in etype_class.cw_fti_index_rql_limit(cnx): |
|
128 source.fti_index_entities(cnx, rset.entities()) |
|
129 # clear entity cache to avoid high memory consumption on big tables |
|
130 cnx.drop_entity_cache() |
|
131 if withpb: |
|
132 pb.update() |
|
133 if withpb: |
|
134 pb.finish() |
|
135 |
|
136 |
|
137 def check_schema(schema, cnx, eids, fix=1): |
|
138 """check serialized schema""" |
|
139 print('Checking serialized schema') |
|
140 rql = ('Any COUNT(X),RN,SN,ON,CTN GROUPBY RN,SN,ON,CTN ORDERBY 1 ' |
|
141 'WHERE X is CWConstraint, R constrained_by X, ' |
|
142 'R relation_type RT, RT name RN, R from_entity ST, ST name SN, ' |
|
143 'R to_entity OT, OT name ON, X cstrtype CT, CT name CTN') |
|
144 for count, rn, sn, on, cstrname in cnx.execute(rql): |
|
145 if count == 1: |
|
146 continue |
|
147 if cstrname in UNIQUE_CONSTRAINTS: |
|
148 print("ERROR: got %s %r constraints on relation %s.%s.%s" % ( |
|
149 count, cstrname, sn, rn, on)) |
|
150 if fix: |
|
151 print('dunno how to fix, do it yourself') |
|
152 |
|
153 |
|
154 |
|
155 def check_text_index(schema, cnx, eids, fix=1): |
|
156 """check all entities registered in the text index""" |
|
157 print('Checking text index') |
|
158 msg = ' Entity with eid %s exists in the text index but in no source (autofix will remove from text index)' |
|
159 cursor = cnx.system_sql('SELECT uid FROM appears;') |
|
160 for row in cursor.fetchall(): |
|
161 eid = row[0] |
|
162 if not has_eid(cnx, cursor, eid, eids): |
|
163 sys.stderr.write(msg % eid) |
|
164 if fix: |
|
165 cnx.system_sql('DELETE FROM appears WHERE uid=%s;' % eid) |
|
166 notify_fixed(fix) |
|
167 |
|
168 |
|
169 def check_entities(schema, cnx, eids, fix=1): |
|
170 """check all entities registered in the repo system table""" |
|
171 print('Checking entities system table') |
|
172 # system table but no source |
|
173 msg = ' Entity %s with eid %s exists in the system table but in no source (autofix will delete the entity)' |
|
174 cursor = cnx.system_sql('SELECT eid,type FROM entities;') |
|
175 for row in cursor.fetchall(): |
|
176 eid, etype = row |
|
177 if not has_eid(cnx, cursor, eid, eids): |
|
178 sys.stderr.write(msg % (etype, eid)) |
|
179 if fix: |
|
180 cnx.system_sql('DELETE FROM entities WHERE eid=%s;' % eid) |
|
181 notify_fixed(fix) |
|
182 # source in entities, but no relation cw_source |
|
183 # XXX this (get_versions) requires a second connection to the db when we already have one open |
|
184 applcwversion = cnx.repo.get_versions().get('cubicweb') |
|
185 if applcwversion >= (3, 13, 1): # entities.asource appeared in 3.13.1 |
|
186 cursor = cnx.system_sql('SELECT e.eid FROM entities as e, cw_CWSource as s ' |
|
187 'WHERE s.cw_name=e.asource AND ' |
|
188 'NOT EXISTS(SELECT 1 FROM cw_source_relation as cs ' |
|
189 ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) ' |
|
190 'ORDER BY e.eid') |
|
191 msg = (' Entity with eid %s refers to source in entities table, ' |
|
192 'but is missing relation cw_source (autofix will create the relation)\n') |
|
193 for row in cursor.fetchall(): |
|
194 sys.stderr.write(msg % row[0]) |
|
195 if fix: |
|
196 cnx.system_sql('INSERT INTO cw_source_relation (eid_from, eid_to) ' |
|
197 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWSource as s ' |
|
198 'WHERE s.cw_name=e.asource AND NOT EXISTS(SELECT 1 FROM cw_source_relation as cs ' |
|
199 ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)') |
|
200 notify_fixed(True) |
|
201 # inconsistencies for 'is' |
|
202 msg = ' %s #%s is missing relation "is" (autofix will create the relation)\n' |
|
203 cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s ' |
|
204 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs ' |
|
205 ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) ' |
|
206 'ORDER BY e.eid') |
|
207 for row in cursor.fetchall(): |
|
208 sys.stderr.write(msg % tuple(row)) |
|
209 if fix: |
|
210 cnx.system_sql('INSERT INTO is_relation (eid_from, eid_to) ' |
|
211 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s ' |
|
212 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_relation as cs ' |
|
213 ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)') |
|
214 notify_fixed(True) |
|
215 # inconsistencies for 'is_instance_of' |
|
216 msg = ' %s #%s is missing relation "is_instance_of" (autofix will create the relation)\n' |
|
217 cursor = cnx.system_sql('SELECT e.type, e.eid FROM entities as e, cw_CWEType as s ' |
|
218 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs ' |
|
219 ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid) ' |
|
220 'ORDER BY e.eid') |
|
221 for row in cursor.fetchall(): |
|
222 sys.stderr.write(msg % tuple(row)) |
|
223 if fix: |
|
224 cnx.system_sql('INSERT INTO is_instance_of_relation (eid_from, eid_to) ' |
|
225 'SELECT e.eid, s.cw_eid FROM entities as e, cw_CWEType as s ' |
|
226 'WHERE s.cw_name=e.type AND NOT EXISTS(SELECT 1 FROM is_instance_of_relation as cs ' |
|
227 ' WHERE cs.eid_from=e.eid AND cs.eid_to=s.cw_eid)') |
|
228 notify_fixed(True) |
|
229 print('Checking entities tables') |
|
230 msg = ' Entity with eid %s exists in the %s table but not in the system table (autofix will delete the entity)' |
|
231 for eschema in schema.entities(): |
|
232 if eschema.final: |
|
233 continue |
|
234 table = SQL_PREFIX + eschema.type |
|
235 column = SQL_PREFIX + 'eid' |
|
236 cursor = cnx.system_sql('SELECT %s FROM %s;' % (column, table)) |
|
237 for row in cursor.fetchall(): |
|
238 eid = row[0] |
|
239 # eids is full since we have fetched everything from the entities table, |
|
240 # no need to call has_eid |
|
241 if not eid in eids or not eids[eid]: |
|
242 sys.stderr.write(msg % (eid, eschema.type)) |
|
243 if fix: |
|
244 cnx.system_sql('DELETE FROM %s WHERE %s=%s;' % (table, column, eid)) |
|
245 notify_fixed(fix) |
|
246 |
|
247 |
|
248 def bad_related_msg(rtype, target, eid, fix): |
|
249 msg = ' A relation %s with %s eid %s exists but no such entity in sources' |
|
250 sys.stderr.write(msg % (rtype, target, eid)) |
|
251 notify_fixed(fix) |
|
252 |
|
253 def bad_inlined_msg(rtype, parent_eid, eid, fix): |
|
254 msg = (' An inlined relation %s from %s to %s exists but the latter ' |
|
255 'entity does not exist') |
|
256 sys.stderr.write(msg % (rtype, parent_eid, eid)) |
|
257 notify_fixed(fix) |
|
258 |
|
259 |
|
260 def check_relations(schema, cnx, eids, fix=1): |
|
261 """check that eids referenced by relations are registered in the repo system |
|
262 table |
|
263 """ |
|
264 print('Checking relations') |
|
265 for rschema in schema.relations(): |
|
266 if rschema.final or rschema.type in PURE_VIRTUAL_RTYPES: |
|
267 continue |
|
268 if rschema.inlined: |
|
269 for subjtype in rschema.subjects(): |
|
270 table = SQL_PREFIX + str(subjtype) |
|
271 column = SQL_PREFIX + str(rschema) |
|
272 sql = 'SELECT cw_eid,%s FROM %s WHERE %s IS NOT NULL;' % ( |
|
273 column, table, column) |
|
274 cursor = cnx.system_sql(sql) |
|
275 for row in cursor.fetchall(): |
|
276 parent_eid, eid = row |
|
277 if not has_eid(cnx, cursor, eid, eids): |
|
278 bad_inlined_msg(rschema, parent_eid, eid, fix) |
|
279 if fix: |
|
280 sql = 'UPDATE %s SET %s=NULL WHERE %s=%s;' % ( |
|
281 table, column, column, eid) |
|
282 cnx.system_sql(sql) |
|
283 continue |
|
284 try: |
|
285 cursor = cnx.system_sql('SELECT eid_from FROM %s_relation;' % rschema) |
|
286 except Exception as ex: |
|
287 # usually because table doesn't exist |
|
288 print('ERROR', ex) |
|
289 continue |
|
290 for row in cursor.fetchall(): |
|
291 eid = row[0] |
|
292 if not has_eid(cnx, cursor, eid, eids): |
|
293 bad_related_msg(rschema, 'subject', eid, fix) |
|
294 if fix: |
|
295 sql = 'DELETE FROM %s_relation WHERE eid_from=%s;' % ( |
|
296 rschema, eid) |
|
297 cnx.system_sql(sql) |
|
298 cursor = cnx.system_sql('SELECT eid_to FROM %s_relation;' % rschema) |
|
299 for row in cursor.fetchall(): |
|
300 eid = row[0] |
|
301 if not has_eid(cnx, cursor, eid, eids): |
|
302 bad_related_msg(rschema, 'object', eid, fix) |
|
303 if fix: |
|
304 sql = 'DELETE FROM %s_relation WHERE eid_to=%s;' % ( |
|
305 rschema, eid) |
|
306 cnx.system_sql(sql) |
|
307 |
|
308 |
|
309 def check_mandatory_relations(schema, cnx, eids, fix=1): |
|
310 """check entities missing some mandatory relation""" |
|
311 print('Checking mandatory relations') |
|
312 msg = '%s #%s is missing mandatory %s relation %s (autofix will delete the entity)' |
|
313 for rschema in schema.relations(): |
|
314 if rschema.final or rschema in PURE_VIRTUAL_RTYPES or rschema in ('is', 'is_instance_of'): |
|
315 continue |
|
316 smandatory = set() |
|
317 omandatory = set() |
|
318 for rdef in rschema.rdefs.values(): |
|
319 if rdef.cardinality[0] in '1+': |
|
320 smandatory.add(rdef.subject) |
|
321 if rdef.cardinality[1] in '1+': |
|
322 omandatory.add(rdef.object) |
|
323 for role, etypes in (('subject', smandatory), ('object', omandatory)): |
|
324 for etype in etypes: |
|
325 if role == 'subject': |
|
326 rql = 'Any X WHERE NOT X %s Y, X is %s' % (rschema, etype) |
|
327 else: |
|
328 rql = 'Any X WHERE NOT Y %s X, X is %s' % (rschema, etype) |
|
329 for entity in cnx.execute(rql).entities(): |
|
330 sys.stderr.write(msg % (entity.cw_etype, entity.eid, role, rschema)) |
|
331 if fix: |
|
332 #if entity.cw_describe()['source']['uri'] == 'system': XXX |
|
333 entity.cw_delete() # XXX this is BRUTAL! |
|
334 notify_fixed(fix) |
|
335 |
|
336 |
|
337 def check_mandatory_attributes(schema, cnx, eids, fix=1): |
|
338 """check for entities stored in the system source missing some mandatory |
|
339 attribute |
|
340 """ |
|
341 print('Checking mandatory attributes') |
|
342 msg = '%s #%s is missing mandatory attribute %s (autofix will delete the entity)' |
|
343 for rschema in schema.relations(): |
|
344 if not rschema.final or rschema in VIRTUAL_RTYPES: |
|
345 continue |
|
346 for rdef in rschema.rdefs.values(): |
|
347 if rdef.cardinality[0] in '1+': |
|
348 rql = 'Any X WHERE X %s NULL, X is %s, X cw_source S, S name "system"' % ( |
|
349 rschema, rdef.subject) |
|
350 for entity in cnx.execute(rql).entities(): |
|
351 sys.stderr.write(msg % (entity.cw_etype, entity.eid, rschema)) |
|
352 if fix: |
|
353 entity.cw_delete() |
|
354 notify_fixed(fix) |
|
355 |
|
356 |
|
357 def check_metadata(schema, cnx, eids, fix=1): |
|
358 """check entities has required metadata |
|
359 |
|
360 FIXME: rewrite using RQL queries ? |
|
361 """ |
|
362 print('Checking metadata') |
|
363 cursor = cnx.system_sql("SELECT DISTINCT type FROM entities;") |
|
364 eidcolumn = SQL_PREFIX + 'eid' |
|
365 msg = ' %s with eid %s has no %s (autofix will set it to now)' |
|
366 for etype, in cursor.fetchall(): |
|
367 if etype not in cnx.vreg.schema: |
|
368 sys.stderr.write('entities table references unknown type %s\n' % |
|
369 etype) |
|
370 if fix: |
|
371 cnx.system_sql("DELETE FROM entities WHERE type = %(type)s", |
|
372 {'type': etype}) |
|
373 continue |
|
374 table = SQL_PREFIX + etype |
|
375 for rel, default in ( ('creation_date', datetime.utcnow()), |
|
376 ('modification_date', datetime.utcnow()), ): |
|
377 column = SQL_PREFIX + rel |
|
378 cursor = cnx.system_sql("SELECT %s FROM %s WHERE %s is NULL" |
|
379 % (eidcolumn, table, column)) |
|
380 for eid, in cursor.fetchall(): |
|
381 sys.stderr.write(msg % (etype, eid, rel)) |
|
382 if fix: |
|
383 cnx.system_sql("UPDATE %s SET %s=%%(v)s WHERE %s=%s ;" |
|
384 % (table, column, eidcolumn, eid), |
|
385 {'v': default}) |
|
386 notify_fixed(fix) |
|
387 |
|
388 |
|
389 def check(repo, cnx, checks, reindex, fix, withpb=True): |
|
390 """check integrity of instance's repository, |
|
391 using given user and password to locally connect to the repository |
|
392 (no running cubicweb server needed) |
|
393 """ |
|
394 # yo, launch checks |
|
395 if checks: |
|
396 eids_cache = {} |
|
397 with cnx.security_enabled(read=False, write=False): # ensure no read security |
|
398 for check in checks: |
|
399 check_func = globals()['check_%s' % check] |
|
400 check_func(repo.schema, cnx, eids_cache, fix=fix) |
|
401 if fix: |
|
402 cnx.commit() |
|
403 else: |
|
404 print() |
|
405 if not fix: |
|
406 print('WARNING: Diagnostic run, nothing has been corrected') |
|
407 if reindex: |
|
408 cnx.rollback() |
|
409 reindex_entities(repo.schema, cnx, withpb=withpb) |
|
410 cnx.commit() |