devtools/fix_po_encoding
author Alexandre Fayolle <alexandre.fayolle@logilab.fr>
Wed, 02 Jun 2010 16:12:18 +0000
branchstable
changeset 5639 4acb860159e4
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
[win32] fix deadlock occuring on the sequence tables with SQLServer actually, this deadlock would occur with any db backend other that PostgreSQL as the previous code was heavily relying on PG's SEQUENCE facility, not available elsewhere. Deadlock description: Thread1 starts creating entities (and therefore calls create_eid): -> this creates a DB-level lock on the entities_id_seq table, which will last until end of transaction Thread2 calls create_eid, which acquires the Python lock object, but updating the entities_id_seq is held by the DB lock Thread1 wants to create a new entity, calls create_eid, and is stuck by the Python lock object held by Thread2. Solution: use a separate connection to read and write the entities_id_seq table.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)