cubicweb/devtools/fix_po_encoding
changeset 11057 0b59724cb3f2
parent 0 b97547f5f1fa
child 12779 44147dab9d27
equal deleted inserted replaced
11052:058bb3dc685f 11057:0b59724cb3f2
       
     1 #!/usr/bin/python
       
     2 
       
     3 """usage: fix-po-encodings [filename...]
       
     4 change the encoding of the po files passed as arguments to utf-8
       
     5 """
       
     6 import sys
       
     7 import re
       
     8 import codecs
       
     9 
       
    10 def change_encoding(filename, target='UTF-8'):
       
    11     fdesc = open(filename)
       
    12     data = fdesc.read()
       
    13     fdesc.close()
       
    14     encoding = find_encoding(data)
       
    15     if encoding == target:
       
    16         return
       
    17     data = fix_encoding(data, target)
       
    18     data = unicode(data, encoding)
       
    19     fdesc = codecs.open(filename, 'wb', encoding=target)
       
    20     fdesc.write(data)
       
    21     fdesc.close()
       
    22 
       
    23 def find_encoding(data):
       
    24     regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
       
    25     mo = regexp.search(data)
       
    26     if mo is None:
       
    27         raise ValueError('No encoding declaration')
       
    28     return mo.group(1)
       
    29 
       
    30 def fix_encoding(data, target_encoding):
       
    31     regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
       
    32     return regexp.sub(r'\1%s\3' % target_encoding, data)
       
    33     
       
    34 
       
    35 
       
    36 for filename in sys.argv[1:]:
       
    37     print filename
       
    38     change_encoding(filename)