# HG changeset patch # User Julien Cristau # Date 1445008360 -7200 # Node ID 0768bf2333a750752d54d096460477758f428221 # Parent 359cbdf3a515fe79d53d511bdd3550d9734f945d [dataimport] give unicode objects to psycopg2 copy_from diff -r 359cbdf3a515 -r 0768bf2333a7 dataimport/pgstore.py --- a/dataimport/pgstore.py Fri Oct 16 17:06:12 2015 +0200 +++ b/dataimport/pgstore.py Fri Oct 16 17:12:40 2015 +0200 @@ -21,13 +21,13 @@ import threading import warnings import os.path as osp -from StringIO import StringIO +from io import StringIO from time import asctime from datetime import date, datetime, time from collections import defaultdict from base64 import b64encode -from six import string_types, integer_types +from six import string_types, integer_types, text_type from six.moves import cPickle as pickle, range from cubicweb.utils import make_uid @@ -72,9 +72,9 @@ _execmany_thread_not_copy_from(cu, statement, data) else: if columns is None: - cu.copy_from(buf, table, null='NULL') + cu.copy_from(buf, table, null=u'NULL') else: - cu.copy_from(buf, table, null='NULL', columns=columns) + cu.copy_from(buf, table, null=u'NULL', columns=columns) def _execmany_thread(sql_connect, statements, dump_output_dir=None, support_copy_from=True, encoding='utf-8'): @@ -124,44 +124,38 @@ def _copyfrom_buffer_convert_None(value, **opts): '''Convert None value to "NULL"''' - return 'NULL' + return u'NULL' def _copyfrom_buffer_convert_number(value, **opts): '''Convert a number into its string representation''' - return str(value) + return text_type(value) def _copyfrom_buffer_convert_string(value, **opts): '''Convert string value. - - Recognized keywords: - :encoding: resulting string encoding (default: utf-8) ''' - encoding = opts.get('encoding','utf-8') escape_chars = ((u'\\', u'\\\\'), (u'\t', u'\\t'), (u'\r', u'\\r'), (u'\n', u'\\n')) for char, replace in escape_chars: value = value.replace(char, replace) - if isinstance(value, unicode): - value = value.encode(encoding) return value def _copyfrom_buffer_convert_date(value, **opts): '''Convert date into "YYYY-MM-DD"''' # Do not use strftime, as it yields issue with date < 1900 # (http://bugs.python.org/issue1777412) - return '%04d-%02d-%02d' % (value.year, value.month, value.day) + return u'%04d-%02d-%02d' % (value.year, value.month, value.day) def _copyfrom_buffer_convert_datetime(value, **opts): '''Convert date into "YYYY-MM-DD HH:MM:SS.UUUUUU"''' # Do not use strftime, as it yields issue with date < 1900 # (http://bugs.python.org/issue1777412) - return '%s %s' % (_copyfrom_buffer_convert_date(value, **opts), - _copyfrom_buffer_convert_time(value, **opts)) + return u'%s %s' % (_copyfrom_buffer_convert_date(value, **opts), + _copyfrom_buffer_convert_time(value, **opts)) def _copyfrom_buffer_convert_time(value, **opts): '''Convert time into "HH:MM:SS.UUUUUU"''' - return '%02d:%02d:%02d.%06d' % (value.hour, value.minute, - value.second, value.microsecond) + return u'%02d:%02d:%02d.%06d' % (value.hour, value.minute, + value.second, value.microsecond) # (types, converter) list. _COPYFROM_BUFFER_CONVERTERS = [ @@ -211,6 +205,7 @@ for types, converter in _COPYFROM_BUFFER_CONVERTERS: if isinstance(value, types): value = converter(value, **convert_opts) + assert isinstance(value, text_type) break else: raise ValueError("Unsupported value type %s" % type(value)) diff -r 359cbdf3a515 -r 0768bf2333a7 dataimport/test/test_pgstore.py --- a/dataimport/test/test_pgstore.py Fri Oct 16 17:06:12 2015 +0200 +++ b/dataimport/test/test_pgstore.py Fri Oct 16 17:12:40 2015 +0200 @@ -32,25 +32,24 @@ def test_convert_none(self): cnvt = pgstore._copyfrom_buffer_convert_None - self.assertEqual('NULL', cnvt(None)) + self.assertEqual(u'NULL', cnvt(None)) def test_convert_number(self): cnvt = pgstore._copyfrom_buffer_convert_number - self.assertEqual('42', cnvt(42)) + self.assertEqual(u'42', cnvt(42)) if PY2: - self.assertEqual('42', cnvt(long(42))) - self.assertEqual('42.42', cnvt(42.42)) + self.assertEqual(u'42', cnvt(long(42))) + self.assertEqual(u'42.42', cnvt(42.42)) def test_convert_string(self): cnvt = pgstore._copyfrom_buffer_convert_string # simple - self.assertEqual('babar', cnvt('babar')) + self.assertEqual(u'babar', cnvt('babar')) # unicode - self.assertEqual('\xc3\xa9l\xc3\xa9phant', cnvt(u'éléphant')) - self.assertEqual('\xe9l\xe9phant', cnvt(u'éléphant', encoding='latin1')) + self.assertEqual(u'éléphant', cnvt(u'éléphant')) # escaping - self.assertEqual('babar\\tceleste\\n', cnvt('babar\tceleste\n')) - self.assertEqual(r'C:\\new\tC:\\test', cnvt('C:\\new\tC:\\test')) + self.assertEqual(u'babar\\tceleste\\n', cnvt(u'babar\tceleste\n')) + self.assertEqual(u'C:\\\\new\\tC:\\\\test', cnvt(u'C:\\new\tC:\\test')) def test_convert_date(self): cnvt = pgstore._copyfrom_buffer_convert_date @@ -73,12 +72,12 @@ DT.datetime(2014, 1, 1, 0, 0, 0))) results = pgstore._create_copyfrom_buffer(data) # all columns - expected = '''42\t42\t42.42\téléphant\t0666-01-13\t06:06:06.000000\t0666-06-13 06:06:06.000000 + expected = u'''42\t42\t42.42\téléphant\t0666-01-13\t06:06:06.000000\t0666-06-13 06:06:06.000000 6\t6\t6.6\tbabar\t2014-01-14\t04:02:01.000000\t2014-01-01 00:00:00.000000''' self.assertMultiLineEqual(expected, results.getvalue()) # selected columns results = pgstore._create_copyfrom_buffer(data, columns=(1, 3, 6)) - expected = '''42\téléphant\t0666-06-13 06:06:06.000000 + expected = u'''42\téléphant\t0666-06-13 06:06:06.000000 6\tbabar\t2014-01-01 00:00:00.000000''' self.assertMultiLineEqual(expected, results.getvalue()) @@ -88,8 +87,8 @@ dict(integer=6, double=6.6, text=u'babar', date=DT.datetime(2014, 1, 1, 0, 0, 0))) results = pgstore._create_copyfrom_buffer(data, ('integer', 'text')) - expected = '''42\téléphant\n6\tbabar''' - self.assertMultiLineEqual(expected, results.getvalue()) + expected = u'''42\téléphant\n6\tbabar''' + self.assertEqual(expected, results.getvalue()) if __name__ == '__main__': unittest_main()