cubicweb/misc/scripts/repair_splitbrain_ldapuser_source.py
changeset 11057 0b59724cb3f2
parent 10589 7c23b7de2b8d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cubicweb/misc/scripts/repair_splitbrain_ldapuser_source.py	Sat Jan 16 13:48:51 2016 +0100
@@ -0,0 +1,109 @@
+"""
+CAUTION: READ THIS CAREFULLY
+
+Sometimes it happens that ldap (specifically ldapuser type) source
+yield "ghost" users. The reasons may vary (server upgrade while some
+instances are still running & syncing with the ldap source, unmanaged
+updates to the upstream ldap, etc.).
+
+This script was written and refined enough times that we are confident
+in that it does something reasonnable (at least it did for the
+target application).
+
+However you should really REALLY understand what it does before
+deciding to apply it for you. And then ADAPT it tou your needs.
+
+"""
+from __future__ import print_function
+
+import base64
+from collections import defaultdict
+
+from cubicweb.server.session import hooks_control
+
+try:
+    source_name, = __args__
+    source = repo.sources_by_uri[source_name]
+except ValueError:
+    print('you should specify the source name as script argument (i.e. after --'
+          ' on the command line)')
+    sys.exit(1)
+except KeyError:
+    print('%s is not an active source' % source_name)
+    sys.exit(1)
+
+# check source is reachable before doing anything
+if not source.get_connection().cnx:
+    print('%s is not reachable. Fix this before running this script' % source_name)
+    sys.exit(1)
+
+def find_dupes():
+    # XXX this retrieves entities from a source name "ldap"
+    #     you will want to adjust
+    rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name)
+    extid2eids = defaultdict(list)
+    for eid, extid in rset:
+        extid2eids[extid].append(eid)
+    return dict((base64.b64decode(extid).lower(), eids)
+                for extid, eids in extid2eids.items()
+                if len(eids) > 1)
+
+def merge_dupes(dupes, docommit=False):
+    gone_eids = []
+    CWUser = schema['CWUser']
+    for extid, eids in dupes.items():
+        newest = eids.pop() # we merge everything on the newest
+        print('merging ghosts of', extid, 'into', newest)
+        # now we merge pairwise into the newest
+        for old in eids:
+            subst = {'old': old, 'new': newest}
+            print('  merging', old)
+            gone_eids.append(old)
+            for rschema in CWUser.subject_relations():
+                if rschema.final or rschema == 'identity':
+                    continue
+                if CWUser.rdef(rschema, 'subject').composite == 'subject':
+                    # old 'composite' property is wiped ...
+                    # think about email addresses, excel preferences
+                    for eschema in rschema.objects():
+                        rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst)
+                else:
+                    # relink the new user to its old relations
+                    rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' %
+                        (rschema, rschema, rschema), subst)
+                    # delete the old relations
+                    rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst)
+            # same thing ...
+            for rschema in CWUser.object_relations():
+                if rschema.final or rschema == 'identity':
+                    continue
+                rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' %
+                    (rschema, rschema, rschema), subst)
+                rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst)
+    if not docommit:
+        rollback()
+        return
+    commit() # XXX flushing operations is wanted rather than really committing
+    print('clean up entities table')
+    sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids)))
+    commit()
+
+def main():
+    dupes = find_dupes()
+    if not dupes:
+        print('No duplicate user')
+        return
+
+    print('Found %s duplicate user instances' % len(dupes))
+
+    while True:
+        print('Fix or dry-run? (f/d)  ... or Ctrl-C to break out')
+        answer = raw_input('> ')
+        if answer.lower() not in 'fd':
+            continue
+        print('Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.')
+        raw_input('<I swear all running instances and workers of the application are stopped>')
+        with hooks_control(session, session.HOOKS_DENY_ALL):
+            merge_dupes(dupes, docommit=answer=='f')
+
+main()