0
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
1 |
#!/usr/bin/python
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
2 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
3 |
"""usage: fix-po-encodings [filename...]
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
4 |
change the encoding of the po files passed as arguments to utf-8
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
5 |
"""
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
6 |
import sys
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
7 |
import re
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
8 |
import codecs
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
9 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
10 |
def change_encoding(filename, target='UTF-8'):
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
11 |
fdesc = open(filename)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
12 |
data = fdesc.read()
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
13 |
fdesc.close()
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
14 |
encoding = find_encoding(data)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
15 |
if encoding == target:
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
16 |
return
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
17 |
data = fix_encoding(data, target)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
18 |
data = unicode(data, encoding)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
19 |
fdesc = codecs.open(filename, 'wb', encoding=target)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
20 |
fdesc.write(data)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
21 |
fdesc.close()
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
22 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
23 |
def find_encoding(data):
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
24 |
regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
25 |
mo = regexp.search(data)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
26 |
if mo is None:
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
27 |
raise ValueError('No encoding declaration')
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
28 |
return mo.group(1)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
29 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
30 |
def fix_encoding(data, target_encoding):
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
31 |
regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
32 |
return regexp.sub(r'\1%s\3' % target_encoding, data)
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
33 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
34 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
35 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
36 |
for filename in sys.argv[1:]:
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
37 |
print filename
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
38 |
change_encoding(filename)
|