cubicweb/devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 29 Sep 2016 23:11:38 +0200
changeset 11760 efb8250e37fb
parent 11057 0b59724cb3f2
child 12779 44147dab9d27
permissions -rwxr-xr-x
Drop deprecated LDAP related script both are relying on the 'entities.source' column which has been dropped in 3.19. They have been written with the old ldapsource in mind, which has been dropped at that time.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)