# HG changeset patch # User Aurelien Campeas # Date 1349088574 -7200 # Node ID 56d9eb013feb0c2d30b29ca70911b5cfb790cc5c # Parent 1ac70b1bf00eb2f9e129091dfc9fb01170cdb8e8 [misc/scripts] a slightly experimental script to help repair LDAPUser cwusers suffering from split-brainite (closes #2497108) diff -r 1ac70b1bf00e -r 56d9eb013feb misc/scripts/repair_splitbrain_ldapuser_source.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/scripts/repair_splitbrain_ldapuser_source.py Mon Oct 01 12:49:34 2012 +0200 @@ -0,0 +1,108 @@ +""" +CAUTION: READ THIS CAREFULLY + +Sometimes it happens that ldap (specifically ldapuser type) source +yield "ghost" users. The reasons may vary (server upgrade while some +instances are still running & syncing with the ldap source, unmanaged +updates to the upstream ldap, etc.). + +This script was written and refined enough times that we are confident +in that it does something reasonnable (at least it did for the +target application). + +However you should really REALLY understand what it does before +deciding to apply it for you. And then ADAPT it tou your needs. + +""" + +import base64 +from collections import defaultdict + +from cubicweb.server.session import hooks_control + +try: + source_name, = __args__ + source = repo.sources_by_uri[source_name] +except ValueError: + print('you should specify the source name as script argument (i.e. after --' + ' on the command line)') + sys.exit(1) +except KeyError: + print '%s is not an active source' % source_name + sys.exit(1) + +# check source is reachable before doing anything +if not source.get_connection().cnx: + print '%s is not reachable. Fix this before running this script' % source_name + sys.exit(1) + +def find_dupes(): + # XXX this retrieves entities from a source name "ldap" + # you will want to adjust + rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name) + extid2eids = defaultdict(list) + for eid, extid in rset: + extid2eids[extid].append(eid) + return dict((base64.b64decode(extid).lower(), eids) + for extid, eids in extid2eids.items() + if len(eids) > 1) + +def merge_dupes(dupes, docommit=False): + gone_eids = [] + CWUser = schema['CWUser'] + for extid, eids in dupes.items(): + newest = eids.pop() # we merge everything on the newest + print 'merging ghosts of', extid, 'into', newest + # now we merge pairwise into the newest + for old in eids: + subst = {'old': old, 'new': newest} + print ' merging', old + gone_eids.append(old) + for rschema in CWUser.subject_relations(): + if rschema.final or rschema == 'identity': + continue + if CWUser.rdef(rschema, 'subject').composite == 'subject': + # old 'composite' property is wiped ... + # think about email addresses, excel preferences + for eschema in rschema.objects(): + rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst) + else: + # relink the new user to its old relations + rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' % + (rschema, rschema, rschema), subst) + # delete the old relations + rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst) + # same thing ... + for rschema in CWUser.object_relations(): + if rschema.final or rschema == 'identity': + continue + rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' % + (rschema, rschema, rschema), subst) + rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst) + if not docommit: + rollback() + return + commit() # XXX flushing operations is wanted rather than really committing + print 'clean up entities table' + sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids))) + commit() + +def main(): + dupes = find_dupes() + if not dupes: + print 'No duplicate user' + return + + print 'Found %s duplicate user instances' % len(dupes) + + while True: + print 'Fix or dry-run? (f/d) ... or Ctrl-C to break out' + answer = raw_input('> ') + if answer.lower() not in 'fd': + continue + print 'Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.' + raw_input('') + with hooks_control(session, session.HOOKS_DENY_ALL): + merge_dupes(dupes, docommit=answer=='f') + +main()