cubicweb/devtools/fix_po_encoding
author Jérémy Bobbio <jeremy.bobbio@irq7.fr>
Wed, 19 Jun 2019 15:44:17 +0200
changeset 12655 5b0ce10a7046
parent 11057 0b59724cb3f2
child 12779 44147dab9d27
permissions -rwxr-xr-x
[crypto] Use Cryptodome namespace instead of Crypto PyCryptodome comes in two flavors: “an almost drop-in replacement for the old PyCrypto library” and “a library independent of the old PyCrypto”. The former uses the Crypto namespace, and is shipped as `pycryptodome` while the latter uses Cryptodome instead and lies in the `pycryptodomex` package. Given the reason to switch to PyCryptodome is that PyCrypto in unmaintained, its probably better to avoid any mistake and mandate the specific usage of the Cryptodome namespace by requiring `pycryptodomex` instead of `pycryptodome`. A more present reason is that Debian buster will only provide a package with the separate namespace flavor. The current Recommends is not working with the current code. Although it's important to note that the package name will probably have to be changed to `python3-pycryptodomex` once https://bugs.debian.org/886291 is solved.

#!/usr/bin/python

"""usage: fix-po-encodings [filename...]
change the encoding of the po files passed as arguments to utf-8
"""
import sys
import re
import codecs

def change_encoding(filename, target='UTF-8'):
    fdesc = open(filename)
    data = fdesc.read()
    fdesc.close()
    encoding = find_encoding(data)
    if encoding == target:
        return
    data = fix_encoding(data, target)
    data = unicode(data, encoding)
    fdesc = codecs.open(filename, 'wb', encoding=target)
    fdesc.write(data)
    fdesc.close()

def find_encoding(data):
    regexp = re.compile(r'"Content-Type:.* charset=([a-zA-Z0-9-]+)\\n"', re.M)
    mo = regexp.search(data)
    if mo is None:
        raise ValueError('No encoding declaration')
    return mo.group(1)

def fix_encoding(data, target_encoding):
    regexp = re.compile(r'("Content-Type:.* charset=)(.*)(\\n")', re.M)
    return regexp.sub(r'\1%s\3' % target_encoding, data)
    


for filename in sys.argv[1:]:
    print filename
    change_encoding(filename)