devtools/fix_po_encoding
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 19 Jul 2016 15:53:58 +0200
branch3.22
changeset 11434 f6ba947c11ee
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[web] Fix bug with usage of os.rename under windows environment In 7c386161ebd6 we removed cache handling from property sheet and introduced usage of a tempfile + os.rename to get atomic generation of files. The pb is that this is not portable, since under windows os.rename will raise an exception if the file already exists (because there is no way to write a file atomatically in such case). This kind of thing should be out of the CW scope anyway, so implements a quick & dirty fix in the mean time. Closes #14214794

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)