diff -r 058bb3dc685f -r 0b59724cb3f2 misc/scripts/repair_splitbrain_ldapuser_source.py --- a/misc/scripts/repair_splitbrain_ldapuser_source.py Mon Jan 04 18:40:30 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ -""" -CAUTION: READ THIS CAREFULLY - -Sometimes it happens that ldap (specifically ldapuser type) source -yield "ghost" users. The reasons may vary (server upgrade while some -instances are still running & syncing with the ldap source, unmanaged -updates to the upstream ldap, etc.). - -This script was written and refined enough times that we are confident -in that it does something reasonnable (at least it did for the -target application). - -However you should really REALLY understand what it does before -deciding to apply it for you. And then ADAPT it tou your needs. - -""" -from __future__ import print_function - -import base64 -from collections import defaultdict - -from cubicweb.server.session import hooks_control - -try: - source_name, = __args__ - source = repo.sources_by_uri[source_name] -except ValueError: - print('you should specify the source name as script argument (i.e. after --' - ' on the command line)') - sys.exit(1) -except KeyError: - print('%s is not an active source' % source_name) - sys.exit(1) - -# check source is reachable before doing anything -if not source.get_connection().cnx: - print('%s is not reachable. Fix this before running this script' % source_name) - sys.exit(1) - -def find_dupes(): - # XXX this retrieves entities from a source name "ldap" - # you will want to adjust - rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name) - extid2eids = defaultdict(list) - for eid, extid in rset: - extid2eids[extid].append(eid) - return dict((base64.b64decode(extid).lower(), eids) - for extid, eids in extid2eids.items() - if len(eids) > 1) - -def merge_dupes(dupes, docommit=False): - gone_eids = [] - CWUser = schema['CWUser'] - for extid, eids in dupes.items(): - newest = eids.pop() # we merge everything on the newest - print('merging ghosts of', extid, 'into', newest) - # now we merge pairwise into the newest - for old in eids: - subst = {'old': old, 'new': newest} - print(' merging', old) - gone_eids.append(old) - for rschema in CWUser.subject_relations(): - if rschema.final or rschema == 'identity': - continue - if CWUser.rdef(rschema, 'subject').composite == 'subject': - # old 'composite' property is wiped ... - # think about email addresses, excel preferences - for eschema in rschema.objects(): - rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst) - else: - # relink the new user to its old relations - rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' % - (rschema, rschema, rschema), subst) - # delete the old relations - rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst) - # same thing ... - for rschema in CWUser.object_relations(): - if rschema.final or rschema == 'identity': - continue - rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' % - (rschema, rschema, rschema), subst) - rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst) - if not docommit: - rollback() - return - commit() # XXX flushing operations is wanted rather than really committing - print('clean up entities table') - sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids))) - commit() - -def main(): - dupes = find_dupes() - if not dupes: - print('No duplicate user') - return - - print('Found %s duplicate user instances' % len(dupes)) - - while True: - print('Fix or dry-run? (f/d) ... or Ctrl-C to break out') - answer = raw_input('> ') - if answer.lower() not in 'fd': - continue - print('Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.') - raw_input('') - with hooks_control(session, session.HOOKS_DENY_ALL): - merge_dupes(dupes, docommit=answer=='f') - -main()