cubicweb/devtools/fix_po_encoding
author Denis Laxalde <denis.laxalde@logilab.fr>
Wed, 26 Oct 2016 08:34:34 +0200
changeset 11751 b57b76091481
parent 11057 0b59724cb3f2
child 12779 44147dab9d27
permissions -rwxr-xr-x
[devtools] Make dependency on backports.tempfile (Python2) optional The package may not be available in all systems (e.g. no Debian package exist at the moment), and we should not crash with ImportError when importing testlib from client code. Follow up on a6dc650bc230 where the dependency was introduced.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)