devtools/fix_po_encoding
author Julien Cristau <julien.cristau@logilab.fr>
Wed, 10 Sep 2014 11:34:32 +0200
changeset 10076 3810332ef42c
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[server] fix 'cnx' variable confusion in DBG_SQL exception case The rollback handling expects 'cnx' to be the cubicweb Connection, but the DBG_SQL block was replacing it with an sql connection, leading to lulz down the line. Also remove obsolete getattr (the sqlite wrapping is now done at the cnxset level, so cnx.cnxset.cnx should be the right thing already).

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)