[dataimport] Turn the module into a package
Just `hg mv` dataimport.py and test/unittest_dataimport.py.
"""CAUTION: READ THIS CAREFULLYSometimes it happens that ldap (specifically ldapuser type) sourceyield "ghost" users. The reasons may vary (server upgrade while someinstances are still running & syncing with the ldap source, unmanagedupdates to the upstream ldap, etc.).This script was written and refined enough times that we are confidentin that it does something reasonnable (at least it did for thetarget application).However you should really REALLY understand what it does beforedeciding to apply it for you. And then ADAPT it tou your needs."""importbase64fromcollectionsimportdefaultdictfromcubicweb.server.sessionimporthooks_controltry:source_name,=__args__source=repo.sources_by_uri[source_name]exceptValueError:print('you should specify the source name as script argument (i.e. after --'' on the command line)')sys.exit(1)exceptKeyError:print'%s is not an active source'%source_namesys.exit(1)# check source is reachable before doing anythingifnotsource.get_connection().cnx:print'%s is not reachable. Fix this before running this script'%source_namesys.exit(1)deffind_dupes():# XXX this retrieves entities from a source name "ldap"# you will want to adjustrset=sql("SELECT eid, extid FROM entities WHERE source='%s'"%source_name)extid2eids=defaultdict(list)foreid,extidinrset:extid2eids[extid].append(eid)returndict((base64.b64decode(extid).lower(),eids)forextid,eidsinextid2eids.items()iflen(eids)>1)defmerge_dupes(dupes,docommit=False):gone_eids=[]CWUser=schema['CWUser']forextid,eidsindupes.items():newest=eids.pop()# we merge everything on the newestprint'merging ghosts of',extid,'into',newest# now we merge pairwise into the newestforoldineids:subst={'old':old,'new':newest}print' merging',oldgone_eids.append(old)forrschemainCWUser.subject_relations():ifrschema.finalorrschema=='identity':continueifCWUser.rdef(rschema,'subject').composite=='subject':# old 'composite' property is wiped ...# think about email addresses, excel preferencesforeschemainrschema.objects():rql('DELETE %s X WHERE U %s X, U eid %%(old)s'%(eschema,rschema),subst)else:# relink the new user to its old relationsrql('SET NU %s X WHERE NU eid %%(new)s, NOT NU %s X, OU %s X, OU eid %%(old)s'%(rschema,rschema,rschema),subst)# delete the old relationsrql('DELETE U %s X WHERE U eid %%(old)s'%rschema,subst)# same thing ...forrschemainCWUser.object_relations():ifrschema.finalorrschema=='identity':continuerql('SET X %s NU WHERE NU eid %%(new)s, NOT X %s NU, X %s OU, OU eid %%(old)s'%(rschema,rschema,rschema),subst)rql('DELETE X %s U WHERE U eid %%(old)s'%rschema,subst)ifnotdocommit:rollback()returncommit()# XXX flushing operations is wanted rather than really committingprint'clean up entities table'sql('DELETE FROM entities WHERE eid IN (%s)'%(', '.join(str(x)forxingone_eids)))commit()defmain():dupes=find_dupes()ifnotdupes:print'No duplicate user'returnprint'Found %s duplicate user instances'%len(dupes)whileTrue:print'Fix or dry-run? (f/d) ... or Ctrl-C to break out'answer=raw_input('> ')ifanswer.lower()notin'fd':continueprint'Please STOP THE APPLICATION INSTANCES (service or interactive), and press Return when done.'raw_input('<I swear all running instances and workers of the application are stopped>')withhooks_control(session,session.HOOKS_DENY_ALL):merge_dupes(dupes,docommit=answer=='f')main()