devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 22 Sep 2011 16:12:23 +0200
changeset 7827 9bbf83f68bcc
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[entity] upgrade fetch_[unrelated_]order to benefit from changes introduced in 3.14 (closes #1942758) of rql generation parts of the ORM now based on rql syntax tree. This allows more powerful and flexible sort control by giving them the syntax tree instead of manipulating string. Also: * prefix new methods by 'cw_' * fix cases that currently crash in 3.14 due to the refactoring

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)