0
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 1
#!/usr/bin/python
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 2
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 3
"""usage: fix-po-encodings [filename...]
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 4
change the encoding of the po files passed as arguments to utf-8
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 5
"""
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 6
import sys
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 7
import re
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 8
import codecs
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 9
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 10
def change_encoding(filename, target='UTF-8'):
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 11
fdesc = open(filename)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 12
data = fdesc.read()
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 13
fdesc.close()
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 14
encoding = find_encoding(data)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 15
if encoding == target:
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 16
return
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 17
data = fix_encoding(data, target)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 18
data = unicode(data, encoding)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 19
fdesc = codecs.open(filename, 'wb', encoding=target)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 20
fdesc.write(data)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 21
fdesc.close()
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 22
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 23
def find_encoding(data):
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 24
regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 25
mo = regexp.search(data)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 26
if mo is None:
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 27
raise ValueError('No encoding declaration')
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 28
return mo.group(1)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 29
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 30
def fix_encoding(data, target_encoding):
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 31
regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 32
return regexp.sub(r'\1%s\3' % target_encoding, data)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 33
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 34
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 35
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 36
for filename in sys.argv[1:]:
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 37
print filename
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
+ − 38
change_encoding(filename)