devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Mon, 10 Sep 2012 14:00:09 +0200
changeset 8537 e30d0a7f0087
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[config] turn internal configuration methods building appobjects search path into normal method rather than class method as this is useless and make path filters difficult to override (we had to override the class attribute, as instance attributes were not seen). Also rename related methods for consistency with current vocabulary

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)