devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 30 Mar 2010 19:55:20 +0200
branchstable
changeset 5090 8c39d2bf58fd
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[repo creation] removing existing entities of 'single' cardinality relatino should be considered as 'activeintegrity' hook. Also don't disable that category during repo creation to avoid pb such as two default workflows for one entity types

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)