|
1 """ |
|
2 CAUTION: READ THIS CAREFULLY |
|
3 |
|
4 Sometimes it happens that ldap (specifically ldapuser type) source |
|
5 yield "ghost" users. The reasons may vary (server upgrade while some |
|
6 instances are still running & syncing with the ldap source, unmanaged |
|
7 updates to the upstream ldap, etc.). |
|
8 |
|
9 This script was written and refined enough times that we are confident |
|
10 in that it does something reasonnable (at least it did for the |
|
11 target application). |
|
12 |
|
13 However you should really REALLY understand what it does before |
|
14 deciding to apply it for you. And then ADAPT it tou your needs. |
|
15 |
|
16 """ |
|
17 |
|
18 import base64 |
|
19 from collections import defaultdict |
|
20 |
|
21 from cubicweb.server.session import hooks_control |
|
22 |
|
23 try: |
|
24 source_name, = __args__ |
|
25 source = repo.sources_by_uri[source_name] |
|
26 except ValueError: |
|
27 print('you should specify the source name as script argument (i.e. after --' |
|
28 ' on the command line)') |
|
29 sys.exit(1) |
|
30 except KeyError: |
|
31 print '%s is not an active source' % source_name |
|
32 sys.exit(1) |
|
33 |
|
34 # check source is reachable before doing anything |
|
35 if not source.get_connection().cnx: |
|
36 print '%s is not reachable. Fix this before running this script' % source_name |
|
37 sys.exit(1) |
|
38 |
|
39 def find_dupes(): |
|
40 # XXX this retrieves entities from a source name "ldap" |
|
41 # you will want to adjust |
|
42 rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name) |
|
43 extid2eids = defaultdict(list) |
|
44 for eid, extid in rset: |
|
45 extid2eids[extid].append(eid) |
|
46 return dict((base64.b64decode(extid).lower(), eids) |
|
47 for extid, eids in extid2eids.items() |
|
48 if len(eids) > 1) |
|
49 |
|
50 def merge_dupes(dupes, docommit=False): |
|
51 gone_eids = [] |
|
52 CWUser = schema['CWUser'] |
|
53 for extid, eids in dupes.items(): |
|
54 newest = eids.pop() # we merge everything on the newest |
|
55 print 'merging ghosts of', extid, 'into', newest |
|
56 # now we merge pairwise into the newest |
|
57 for old in eids: |
|
58 subst = {'old': old, 'new': newest} |
|
59 print ' merging', old |
|
60 gone_eids.append(old) |
|
61 for rschema in CWUser.subject_relations(): |
|
62 if rschema.final or rschema == 'identity': |
|
63 continue |
|
64 if CWUser.rdef(rschema, 'subject').composite == 'subject': |
|
65 # old 'composite' property is wiped ... |
|
66 # think about email addresses, excel preferences |
|
67 for eschema in rschema.objects(): |
|
68 rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst) |
|
69 else: |
|
70 # relink the new user to its old relations |
|
71 rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' % |
|
72 (rschema, rschema, rschema), subst) |
|
73 # delete the old relations |
|
74 rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst) |
|
75 # same thing ... |
|
76 for rschema in CWUser.object_relations(): |
|
77 if rschema.final or rschema == 'identity': |
|
78 continue |
|
79 rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' % |
|
80 (rschema, rschema, rschema), subst) |
|
81 rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst) |
|
82 if not docommit: |
|
83 rollback() |
|
84 return |
|
85 commit() # XXX flushing operations is wanted rather than really committing |
|
86 print 'clean up entities table' |
|
87 sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids))) |
|
88 commit() |
|
89 |
|
90 def main(): |
|
91 dupes = find_dupes() |
|
92 if not dupes: |
|
93 print 'No duplicate user' |
|
94 return |
|
95 |
|
96 print 'Found %s duplicate user instances' % len(dupes) |
|
97 |
|
98 while True: |
|
99 print 'Fix or dry-run? (f/d) ... or Ctrl-C to break out' |
|
100 answer = raw_input('> ') |
|
101 if answer.lower() not in 'fd': |
|
102 continue |
|
103 print 'Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.' |
|
104 raw_input('<I swear all running instances and workers of the application are stopped>') |
|
105 with hooks_control(session, session.HOOKS_DENY_ALL): |
|
106 merge_dupes(dupes, docommit=answer=='f') |
|
107 |
|
108 main() |