devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 21 Jun 2011 10:57:25 +0200
changeset 7543 570522300e22
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[ms, entity metas] add 'actual source' to entities table / base entity metadata cache. Closes #1767090 this is needed since for entities from 'copy based sources' such as datafeed, we want entity.cw_metainformation() to return as 'source' the datafeed source, not the system source (ie the source where the entity is actually stored). For both performance and bootstraping reasons, we should store this information in the `entities` table and in the _type_source cache.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)