[ldapuser2ldapfeed] fix confusing script structure and decode the extid to avoid an UnicodeDecodeError (closes #2413437) stable
authorAurelien Campeas <aurelien.campeas@logilab.fr>
Tue, 10 Jul 2012 15:32:40 +0200
branchstable
changeset 8468 f52bb4226020
parent 8467 ad75430a2dc8
child 8470 a85e107947e6
[ldapuser2ldapfeed] fix confusing script structure and decode the extid to avoid an UnicodeDecodeError (closes #2413437)
misc/scripts/ldapuser2ldapfeed.py
--- a/misc/scripts/ldapuser2ldapfeed.py	Tue Jul 17 11:08:36 2012 +0200
+++ b/misc/scripts/ldapuser2ldapfeed.py	Tue Jul 10 15:32:40 2012 +0200
@@ -3,6 +3,7 @@
 Once this script is run, execute c-c db-check to cleanup relation tables.
 """
 import sys
+from collections import defaultdict
 
 try:
     source_name, = __args__
@@ -33,44 +34,65 @@
 
 print '******************** backport entity content ***************************'
 
-todelete = {}
+todelete = defaultdict(list)
+extids = set()
+duplicates = []
 for entity in rql('Any X WHERE X cw_source S, S eid %(s)s', {'s': source.eid}).entities():
-        etype = entity.__regid__
-        if not source.support_entity(etype):
-            print "source doesn't support %s, delete %s" % (etype, entity.eid)
-        else:
-            try:
-                entity.complete()
-            except Exception:
-                print '%s %s much probably deleted, delete it (extid %s)' % (
-                    etype, entity.eid, entity.cw_metainformation()['extid'])
-            else:
-                print 'get back', etype, entity.eid
-                entity.cw_edited = EditedEntity(entity, **entity.cw_attr_cache)
-                if not entity.creation_date:
-                    entity.cw_edited['creation_date'] = datetime.now()
-                if not entity.modification_date:
-                    entity.cw_edited['modification_date'] = datetime.now()
-                if not entity.upassword:
-                    entity.cw_edited['upassword'] = u''
-                if not entity.cwuri:
-                    entity.cw_edited['cwuri'] = '%s/?dn=%s' % (
-                        source.urls[0], entity.cw_metainformation()['extid'])
-                print entity.cw_edited
-                system_source.add_entity(session, entity)
-                sql("UPDATE entities SET source='system' "
-                    "WHERE eid=%(eid)s", {'eid': entity.eid})
-                continue
-        todelete.setdefault(etype, []).append(entity)
+    etype = entity.__regid__
+    if not source.support_entity(etype):
+        print "source doesn't support %s, delete %s" % (etype, entity.eid)
+        todelete[etype].append(entity)
+        continue
+    try:
+        entity.complete()
+    except Exception:
+        print '%s %s much probably deleted, delete it (extid %s)' % (
+            etype, entity.eid, entity.cw_metainformation()['extid'])
+        todelete[etype].append(entity)
+        continue
+    print 'get back', etype, entity.eid
+    entity.cw_edited = EditedEntity(entity, **entity.cw_attr_cache)
+    if not entity.creation_date:
+        entity.cw_edited['creation_date'] = datetime.now()
+    if not entity.modification_date:
+        entity.cw_edited['modification_date'] = datetime.now()
+    if not entity.upassword:
+        entity.cw_edited['upassword'] = u''
+    extid = entity.cw_metainformation()['extid']
+    if not entity.cwuri:
+        entity.cw_edited['cwuri'] = '%s/?dn=%s' % (
+            source.urls[0], extid.decode('utf-8', 'ignore'))
+    print entity.cw_edited
+    if extid in extids:
+        duplicates.append(extid)
+        continue
+    extids.add(extid)
+    system_source.add_entity(session, entity)
+    sql("UPDATE entities SET source='system' "
+        "WHERE eid=%(eid)s", {'eid': entity.eid})
 
 # only cleanup entities table, remaining stuff should be cleaned by a c-c
 # db-check to be run after this script
-for entities in todelete.values():
+if duplicates:
+    print 'found %s duplicate entries' % len(duplicates)
+    from pprint import pprint
+    pprint(duplicates)
+
+print len(todelete), 'entities will be deleted'
+for etype, entities in todelete.values():
+    print 'deleting', etype, [e.login for e in entities]
     system_source.delete_info_multi(session, entities, source_name)
 
 
+
 source_ent = rql('CWSource S WHERE S eid %(s)s', {'s': source.eid}).get_entity(0, 0)
 source_ent.set_attributes(type=u"ldapfeed", parser=u"ldapfeed")
 
 
-commit()
+if raw_input('Commit ?') in 'yY':
+    print 'committing'
+    commit()
+else:
+    rollback()
+    print 'rollbacked'
+