equal
deleted
inserted
replaced
|
1 #!/usr/bin/python |
|
2 |
|
3 """usage: fix-po-encodings [filename...] |
|
4 change the encoding of the po files passed as arguments to utf-8 |
|
5 """ |
|
6 import sys |
|
7 import re |
|
8 import codecs |
|
9 |
|
10 def change_encoding(filename, target='UTF-8'): |
|
11 fdesc = open(filename) |
|
12 data = fdesc.read() |
|
13 fdesc.close() |
|
14 encoding = find_encoding(data) |
|
15 if encoding == target: |
|
16 return |
|
17 data = fix_encoding(data, target) |
|
18 data = unicode(data, encoding) |
|
19 fdesc = codecs.open(filename, 'wb', encoding=target) |
|
20 fdesc.write(data) |
|
21 fdesc.close() |
|
22 |
|
23 def find_encoding(data): |
|
24 regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M) |
|
25 mo = regexp.search(data) |
|
26 if mo is None: |
|
27 raise ValueError('No encoding declaration') |
|
28 return mo.group(1) |
|
29 |
|
30 def fix_encoding(data, target_encoding): |
|
31 regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M) |
|
32 return regexp.sub(r'\1%s\3' % target_encoding, data) |
|
33 |
|
34 |
|
35 |
|
36 for filename in sys.argv[1:]: |
|
37 print filename |
|
38 change_encoding(filename) |