devtools/fix_po_encoding
author Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
Thu, 12 Nov 2009 18:27:59 +0100
branchstable
changeset 3834 e3e64352063d
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[javascript] fid form / onfailure behaviour If onfailure is specified, it should be done _before_ any default / standard CW action is done. The callback signature should be the same as the onsuccess one. If the onfailure callback returns true, the default actions will still take place afterwards, otherwise (i.e. return false), the processing stops directly after the callback.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)