devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 28 Jan 2016 18:17:30 +0100 (2016-01-28)
changeset 11090 b4b854c25de5
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[repository] set .eid on eschema when schema is loaded from the filesystem enforcing the contract that a repository's schema should have .eid attribute of entity schema set to the eid of the entity used to serialize them in the db. Before this cset, this was not true during tests or for some c-c commands where 'quick_start' is set (eg db-restore, i18ncube...). The change in server __init__ makes this assumption true during instance creation: the serializing code was actually setting eid on schema object, but a reference to a previously built schema was given instead of the one for the latest created repository. Closes #10450092
#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)