1 """ |
|
2 CAUTION: READ THIS CAREFULLY |
|
3 |
|
4 Sometimes it happens that ldap (specifically ldapuser type) source |
|
5 yield "ghost" users. The reasons may vary (server upgrade while some |
|
6 instances are still running & syncing with the ldap source, unmanaged |
|
7 updates to the upstream ldap, etc.). |
|
8 |
|
9 This script was written and refined enough times that we are confident |
|
10 in that it does something reasonnable (at least it did for the |
|
11 target application). |
|
12 |
|
13 However you should really REALLY understand what it does before |
|
14 deciding to apply it for you. And then ADAPT it tou your needs. |
|
15 |
|
16 """ |
|
17 from __future__ import print_function |
|
18 |
|
19 import base64 |
|
20 from collections import defaultdict |
|
21 |
|
22 from cubicweb.server.session import hooks_control |
|
23 |
|
24 try: |
|
25 source_name, = __args__ |
|
26 source = repo.sources_by_uri[source_name] |
|
27 except ValueError: |
|
28 print('you should specify the source name as script argument (i.e. after --' |
|
29 ' on the command line)') |
|
30 sys.exit(1) |
|
31 except KeyError: |
|
32 print('%s is not an active source' % source_name) |
|
33 sys.exit(1) |
|
34 |
|
35 # check source is reachable before doing anything |
|
36 if not source.get_connection().cnx: |
|
37 print('%s is not reachable. Fix this before running this script' % source_name) |
|
38 sys.exit(1) |
|
39 |
|
40 def find_dupes(): |
|
41 # XXX this retrieves entities from a source name "ldap" |
|
42 # you will want to adjust |
|
43 rset = sql("SELECT eid, extid FROM entities WHERE source='%s'" % source_name) |
|
44 extid2eids = defaultdict(list) |
|
45 for eid, extid in rset: |
|
46 extid2eids[extid].append(eid) |
|
47 return dict((base64.b64decode(extid).lower(), eids) |
|
48 for extid, eids in extid2eids.items() |
|
49 if len(eids) > 1) |
|
50 |
|
51 def merge_dupes(dupes, docommit=False): |
|
52 gone_eids = [] |
|
53 CWUser = schema['CWUser'] |
|
54 for extid, eids in dupes.items(): |
|
55 newest = eids.pop() # we merge everything on the newest |
|
56 print('merging ghosts of', extid, 'into', newest) |
|
57 # now we merge pairwise into the newest |
|
58 for old in eids: |
|
59 subst = {'old': old, 'new': newest} |
|
60 print(' merging', old) |
|
61 gone_eids.append(old) |
|
62 for rschema in CWUser.subject_relations(): |
|
63 if rschema.final or rschema == 'identity': |
|
64 continue |
|
65 if CWUser.rdef(rschema, 'subject').composite == 'subject': |
|
66 # old 'composite' property is wiped ... |
|
67 # think about email addresses, excel preferences |
|
68 for eschema in rschema.objects(): |
|
69 rql('DELETE %s X WHERE U %s X, U eid %%(old)s' % (eschema, rschema), subst) |
|
70 else: |
|
71 # relink the new user to its old relations |
|
72 rql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s' % |
|
73 (rschema, rschema, rschema), subst) |
|
74 # delete the old relations |
|
75 rql('DELETE U %s X WHERE U eid %%(old)s' % rschema, subst) |
|
76 # same thing ... |
|
77 for rschema in CWUser.object_relations(): |
|
78 if rschema.final or rschema == 'identity': |
|
79 continue |
|
80 rql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s' % |
|
81 (rschema, rschema, rschema), subst) |
|
82 rql('DELETE X %s U WHERE U eid %%(old)s' % rschema, subst) |
|
83 if not docommit: |
|
84 rollback() |
|
85 return |
|
86 commit() # XXX flushing operations is wanted rather than really committing |
|
87 print('clean up entities table') |
|
88 sql('DELETE FROM entities WHERE eid IN (%s)' % (', '.join(str(x) for x in gone_eids))) |
|
89 commit() |
|
90 |
|
91 def main(): |
|
92 dupes = find_dupes() |
|
93 if not dupes: |
|
94 print('No duplicate user') |
|
95 return |
|
96 |
|
97 print('Found %s duplicate user instances' % len(dupes)) |
|
98 |
|
99 while True: |
|
100 print('Fix or dry-run? (f/d) ... or Ctrl-C to break out') |
|
101 answer = raw_input('> ') |
|
102 if answer.lower() not in 'fd': |
|
103 continue |
|
104 print('Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.') |
|
105 raw_input('<I swear all running instances and workers of the application are stopped>') |
|
106 with hooks_control(session, session.HOOKS_DENY_ALL): |
|
107 merge_dupes(dupes, docommit=answer=='f') |
|
108 |
|
109 main() |
|