devtools/fix_po_encoding
author Julien Cristau <julien.cristau@logilab.fr>
Wed, 21 Jan 2015 15:58:33 +0100
branchstable
changeset 10153 85cbf16fbb57
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[security] Test case and fix for an INSERT security hole 7099bbd685aa introduced a untested corner case in which an Entity with no attribute specified could be created whatever the permissions. Report and test case by Christophe de Vienne, fix by Aurelien Campeas. Thanks! Closes #4854359

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)