cubicweb/misc/scripts/repair_splitbrain_ldapuser_source.py
changeset 11057 0b59724cb3f2
parent 10589 7c23b7de2b8d
equal deleted inserted replaced
11052:058bb3dc685f 11057:0b59724cb3f2
       
     1 """
       
     2 CAUTION: READ THIS CAREFULLY
       
     3 
       
     4 Sometimes it happens that ldap (specifically ldapuser type) source
       
     5 yield "ghost" users. The reasons may vary (server upgrade while some
       
     6 instances are still running & syncing with the ldap source, unmanaged
       
     7 updates to the upstream ldap, etc.).
       
     8 
       
     9 This script was written and refined enough times that we are confident
       
    10 in that it does something reasonnable (at least it did for the
       
    11 target application).
       
    12 
       
    13 However you should really REALLY understand what it does before
       
    14 deciding to apply it for you. And then ADAPT it tou your needs.
       
    15 
       
    16 """
       
    17 from __future__ import print_function
       
    18 
       
    19 import base64
       
    20 from collections import defaultdict
       
    21 
       
    22 from cubicweb.server.session import hooks_control
       
    23 
       
    24 try:
       
    25     source_name, = __args__
       
    26     source = repo.sources_by_uri[source_name]
       
    27 except ValueError:
       
    28     print('you should specify the source name as script argument (i.e. after --'
       
    29           ' on the command line)')
       
    30     sys.exit(1)
       
    31 except KeyError:
       
    32     print('%s is not an active source' % source_name)
       
    33     sys.exit(1)
       
    34 
       
    35 # check source is reachable before doing anything
       
    36 if not source.get_connection().cnx:
       
    37     print('%s is not reachable. Fix this before running this script' % source_name)
       
    38     sys.exit(1)
       
    39 
       
    40 def find_dupes():
       
    41     # XXX this retrieves entities from a source name "ldap"
       
    42     #     you will want to adjust
       
    43     rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name)
       
    44     extid2eids = defaultdict(list)
       
    45     for eid, extid in rset:
       
    46         extid2eids[extid].append(eid)
       
    47     return dict((base64.b64decode(extid).lower(), eids)
       
    48                 for extid, eids in extid2eids.items()
       
    49                 if len(eids) > 1)
       
    50 
       
    51 def merge_dupes(dupes, docommit=False):
       
    52     gone_eids = []
       
    53     CWUser = schema['CWUser']
       
    54     for extid, eids in dupes.items():
       
    55         newest = eids.pop() # we merge everything on the newest
       
    56         print('merging ghosts of', extid, 'into', newest)
       
    57         # now we merge pairwise into the newest
       
    58         for old in eids:
       
    59             subst = {'old': old, 'new': newest}
       
    60             print('  merging', old)
       
    61             gone_eids.append(old)
       
    62             for rschema in CWUser.subject_relations():
       
    63                 if rschema.final or rschema == 'identity':
       
    64                     continue
       
    65                 if CWUser.rdef(rschema, 'subject').composite == 'subject':
       
    66                     # old 'composite' property is wiped ...
       
    67                     # think about email addresses, excel preferences
       
    68                     for eschema in rschema.objects():
       
    69                         rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst)
       
    70                 else:
       
    71                     # relink the new user to its old relations
       
    72                     rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' %
       
    73                         (rschema, rschema, rschema), subst)
       
    74                     # delete the old relations
       
    75                     rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst)
       
    76             # same thing ...
       
    77             for rschema in CWUser.object_relations():
       
    78                 if rschema.final or rschema == 'identity':
       
    79                     continue
       
    80                 rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' %
       
    81                     (rschema, rschema, rschema), subst)
       
    82                 rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst)
       
    83     if not docommit:
       
    84         rollback()
       
    85         return
       
    86     commit() # XXX flushing operations is wanted rather than really committing
       
    87     print('clean up entities table')
       
    88     sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids)))
       
    89     commit()
       
    90 
       
    91 def main():
       
    92     dupes = find_dupes()
       
    93     if not dupes:
       
    94         print('No duplicate user')
       
    95         return
       
    96 
       
    97     print('Found %s duplicate user instances' % len(dupes))
       
    98 
       
    99     while True:
       
   100         print('Fix or dry-run? (f/d)  ... or Ctrl-C to break out')
       
   101         answer = raw_input('> ')
       
   102         if answer.lower() not in 'fd':
       
   103             continue
       
   104         print('Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.')
       
   105         raw_input('<I swear all running instances and workers of the application are stopped>')
       
   106         with hooks_control(session, session.HOOKS_DENY_ALL):
       
   107             merge_dupes(dupes, docommit=answer=='f')
       
   108 
       
   109 main()