devtools/fix_po_encoding
author Julien Cristau <julien.cristau@logilab.fr>
Mon, 18 May 2015 16:44:49 +0200
changeset 10439 45e18b4a7466
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[devtools] change the way we start/stop postgresql Instead of having the test db handler start a cluster on demand, use the test module's setUp/tearDown callbacks to do it. This allows to have one data directory (and thus cluster) per test module, allowing different test modules to run in parallel, each using its own database cluster whose path is based on the test module.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)