devtools/fix_po_encoding
author Aurelien Campeas <aurelien.campeas@logilab.fr>
Tue, 27 May 2014 18:47:24 +0200
changeset 10087 ed0b076c119b
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[rset] kill the rset._rqlst cache Right now it "works" for the standard, internal uses. However when we will fold ClientConnection and Connection, it will hurt, because suddenly we get more cache hits, and the following situation would become commonplace: * there is an un-annotated _rqlst given by the querier * some view (e.g. facets) requests the .syntax_tree, which takes a copy of _rqlst * the view actually expects the rql syntax tree to be annotated, but it was not, hence we crash. Related to #3837233.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)