devtools/fix_po_encoding
author Julien Cristau <julien.cristau@logilab.fr>
Mon, 07 Apr 2014 18:04:56 +0200
changeset 9636 e35ae8617c03
parent 0 b97547f5f1fa
permissions -rwxr-xr-x
Fix constraint sync during migration - restore constraints lost during merge in test schema. - use constraint_by_eid in BeforeDeleteCWConstraintHook as done in 3.17.14 for BeforeDeleteConstrainedByHook. Fixes handling of multiple constraints of the same type. - make sync_schema_props_perms() delete the CWConstraint entity instead of the constrained_by relation. In 3.19, the latter doesn't automatically result in the former just because the relation is composite. Simplify the constraint migration to delete all removed constraints and recreate new ones even if they share the same type; that optimization made the code more complicated for (AFAICT) no significant reason.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)