misc/scripts/repair_splitbrain_ldapuser_source.py
branchstable
changeset 8568 56d9eb013feb
child 10589 7c23b7de2b8d
equal deleted inserted replaced
8567:1ac70b1bf00e 8568:56d9eb013feb
       
     1 """
       
     2 CAUTION: READ THIS CAREFULLY
       
     3 
       
     4 Sometimes it happens that ldap (specifically ldapuser type) source
       
     5 yield "ghost" users. The reasons may vary (server upgrade while some
       
     6 instances are still running & syncing with the ldap source, unmanaged
       
     7 updates to the upstream ldap, etc.).
       
     8 
       
     9 This script was written and refined enough times that we are confident
       
    10 in that it does something reasonnable (at least it did for the
       
    11 target application).
       
    12 
       
    13 However you should really REALLY understand what it does before
       
    14 deciding to apply it for you. And then ADAPT it tou your needs.
       
    15 
       
    16 """
       
    17 
       
    18 import base64
       
    19 from collections import defaultdict
       
    20 
       
    21 from cubicweb.server.session import hooks_control
       
    22 
       
    23 try:
       
    24     source_name, = __args__
       
    25     source = repo.sources_by_uri[source_name]
       
    26 except ValueError:
       
    27     print('you should specify the source name as script argument (i.e. after --'
       
    28           ' on the command line)')
       
    29     sys.exit(1)
       
    30 except KeyError:
       
    31     print '%s is not an active source' % source_name
       
    32     sys.exit(1)
       
    33 
       
    34 # check source is reachable before doing anything
       
    35 if not source.get_connection().cnx:
       
    36     print '%s is not reachable. Fix this before running this script' % source_name
       
    37     sys.exit(1)
       
    38 
       
    39 def find_dupes():
       
    40     # XXX this retrieves entities from a source name "ldap"
       
    41     #     you will want to adjust
       
    42     rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name)
       
    43     extid2eids = defaultdict(list)
       
    44     for eid, extid in rset:
       
    45         extid2eids[extid].append(eid)
       
    46     return dict((base64.b64decode(extid).lower(), eids)
       
    47                 for extid, eids in extid2eids.items()
       
    48                 if len(eids) > 1)
       
    49 
       
    50 def merge_dupes(dupes, docommit=False):
       
    51     gone_eids = []
       
    52     CWUser = schema['CWUser']
       
    53     for extid, eids in dupes.items():
       
    54         newest = eids.pop() # we merge everything on the newest
       
    55         print 'merging ghosts of', extid, 'into', newest
       
    56         # now we merge pairwise into the newest
       
    57         for old in eids:
       
    58             subst = {'old': old, 'new': newest}
       
    59             print '  merging', old
       
    60             gone_eids.append(old)
       
    61             for rschema in CWUser.subject_relations():
       
    62                 if rschema.final or rschema == 'identity':
       
    63                     continue
       
    64                 if CWUser.rdef(rschema, 'subject').composite == 'subject':
       
    65                     # old 'composite' property is wiped ...
       
    66                     # think about email addresses, excel preferences
       
    67                     for eschema in rschema.objects():
       
    68                         rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst)
       
    69                 else:
       
    70                     # relink the new user to its old relations
       
    71                     rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' %
       
    72                         (rschema, rschema, rschema), subst)
       
    73                     # delete the old relations
       
    74                     rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst)
       
    75             # same thing ...
       
    76             for rschema in CWUser.object_relations():
       
    77                 if rschema.final or rschema == 'identity':
       
    78                     continue
       
    79                 rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' %
       
    80                     (rschema, rschema, rschema), subst)
       
    81                 rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst)
       
    82     if not docommit:
       
    83         rollback()
       
    84         return
       
    85     commit() # XXX flushing operations is wanted rather than really committing
       
    86     print 'clean up entities table'
       
    87     sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids)))
       
    88     commit()
       
    89 
       
    90 def main():
       
    91     dupes = find_dupes()
       
    92     if not dupes:
       
    93         print 'No duplicate user'
       
    94         return
       
    95 
       
    96     print 'Found %s duplicate user instances' % len(dupes)
       
    97 
       
    98     while True:
       
    99         print 'Fix or dry-run? (f/d)  ... or Ctrl-C to break out'
       
   100         answer = raw_input('> ')
       
   101         if answer.lower() not in 'fd':
       
   102             continue
       
   103         print 'Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.'
       
   104         raw_input('<I swear all running instances and workers of the application are stopped>')
       
   105         with hooks_control(session, session.HOOKS_DENY_ALL):
       
   106             merge_dupes(dupes, docommit=answer=='f')
       
   107 
       
   108 main()