devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 27 Mar 2014 14:32:21 +0100
branchstable
changeset 9588 fe267b7336f3
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[migration] always rebuild infered relation This was skipped for some bad reason (see 12ad88615a12 which introduced the change). Fix for #231956 in Yams is necessary to allow this cset: during a migration, we want to always reinfer relations while allowing explicit redefinition of an infered relation later. Else, we may run the migration with missing parts of the schema (the one that should have been infered). Closes #3685463.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)