devtools/fix_po_encoding
author Pierre-Yves David <pierre-yves.david@logilab.fr>
Wed, 27 Mar 2013 19:54:36 +0100
changeset 8809 9ee4d0c65ad2
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[session] use a dedicated class to track cnxset We introduce a new CnxSetTracker to track `cnxset` used by Transaction and allows to wait for them. This new class does not use Thread ID not thread joining to work. This allows to use multiple transaction per thread and a transaction in multiple thread. The class itself is totally threadsafe by the Transaction is still not thread safe. The old _threads_in_transaction attribute is dropped in favor of a new logic based on this object. The registration of cnxset used is not done by the Transaction itself. tx.cnset is a property handling the Consistency of its value with the CnxSetTracker instance. Note: The CnxSetTracker instance only track transaction id, not transaction itself, So not reference cycle are created.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)