[dataimport] give unicode objects to psycopg2 copy_from
authorJulien Cristau <julien.cristau@logilab.fr>
Fri, 16 Oct 2015 17:12:40 +0200
changeset 10810 0768bf2333a7
parent 10809 359cbdf3a515
child 10811 5a1b4361a797
[dataimport] give unicode objects to psycopg2 copy_from
dataimport/pgstore.py
dataimport/test/test_pgstore.py
--- a/dataimport/pgstore.py	Fri Oct 16 17:06:12 2015 +0200
+++ b/dataimport/pgstore.py	Fri Oct 16 17:12:40 2015 +0200
@@ -21,13 +21,13 @@
 import threading
 import warnings
 import os.path as osp
-from StringIO import StringIO
+from io import StringIO
 from time import asctime
 from datetime import date, datetime, time
 from collections import defaultdict
 from base64 import b64encode
 
-from six import string_types, integer_types
+from six import string_types, integer_types, text_type
 from six.moves import cPickle as pickle, range
 
 from cubicweb.utils import make_uid
@@ -72,9 +72,9 @@
         _execmany_thread_not_copy_from(cu, statement, data)
     else:
         if columns is None:
-            cu.copy_from(buf, table, null='NULL')
+            cu.copy_from(buf, table, null=u'NULL')
         else:
-            cu.copy_from(buf, table, null='NULL', columns=columns)
+            cu.copy_from(buf, table, null=u'NULL', columns=columns)
 
 def _execmany_thread(sql_connect, statements, dump_output_dir=None,
                      support_copy_from=True, encoding='utf-8'):
@@ -124,44 +124,38 @@
 
 def _copyfrom_buffer_convert_None(value, **opts):
     '''Convert None value to "NULL"'''
-    return 'NULL'
+    return u'NULL'
 
 def _copyfrom_buffer_convert_number(value, **opts):
     '''Convert a number into its string representation'''
-    return str(value)
+    return text_type(value)
 
 def _copyfrom_buffer_convert_string(value, **opts):
     '''Convert string value.
-
-    Recognized keywords:
-    :encoding: resulting string encoding (default: utf-8)
     '''
-    encoding = opts.get('encoding','utf-8')
     escape_chars = ((u'\\', u'\\\\'), (u'\t', u'\\t'), (u'\r', u'\\r'),
                     (u'\n', u'\\n'))
     for char, replace in escape_chars:
         value = value.replace(char, replace)
-    if isinstance(value, unicode):
-        value = value.encode(encoding)
     return value
 
 def _copyfrom_buffer_convert_date(value, **opts):
     '''Convert date into "YYYY-MM-DD"'''
     # Do not use strftime, as it yields issue with date < 1900
     # (http://bugs.python.org/issue1777412)
-    return '%04d-%02d-%02d' % (value.year, value.month, value.day)
+    return u'%04d-%02d-%02d' % (value.year, value.month, value.day)
 
 def _copyfrom_buffer_convert_datetime(value, **opts):
     '''Convert date into "YYYY-MM-DD HH:MM:SS.UUUUUU"'''
     # Do not use strftime, as it yields issue with date < 1900
     # (http://bugs.python.org/issue1777412)
-    return '%s %s' % (_copyfrom_buffer_convert_date(value, **opts),
-                      _copyfrom_buffer_convert_time(value, **opts))
+    return u'%s %s' % (_copyfrom_buffer_convert_date(value, **opts),
+                       _copyfrom_buffer_convert_time(value, **opts))
 
 def _copyfrom_buffer_convert_time(value, **opts):
     '''Convert time into "HH:MM:SS.UUUUUU"'''
-    return '%02d:%02d:%02d.%06d' % (value.hour, value.minute,
-                                    value.second, value.microsecond)
+    return u'%02d:%02d:%02d.%06d' % (value.hour, value.minute,
+                                     value.second, value.microsecond)
 
 # (types, converter) list.
 _COPYFROM_BUFFER_CONVERTERS = [
@@ -211,6 +205,7 @@
             for types, converter in _COPYFROM_BUFFER_CONVERTERS:
                 if isinstance(value, types):
                     value = converter(value, **convert_opts)
+                    assert isinstance(value, text_type)
                     break
             else:
                 raise ValueError("Unsupported value type %s" % type(value))
--- a/dataimport/test/test_pgstore.py	Fri Oct 16 17:06:12 2015 +0200
+++ b/dataimport/test/test_pgstore.py	Fri Oct 16 17:12:40 2015 +0200
@@ -32,25 +32,24 @@
 
     def test_convert_none(self):
         cnvt = pgstore._copyfrom_buffer_convert_None
-        self.assertEqual('NULL', cnvt(None))
+        self.assertEqual(u'NULL', cnvt(None))
 
     def test_convert_number(self):
         cnvt = pgstore._copyfrom_buffer_convert_number
-        self.assertEqual('42', cnvt(42))
+        self.assertEqual(u'42', cnvt(42))
         if PY2:
-            self.assertEqual('42', cnvt(long(42)))
-        self.assertEqual('42.42', cnvt(42.42))
+            self.assertEqual(u'42', cnvt(long(42)))
+        self.assertEqual(u'42.42', cnvt(42.42))
 
     def test_convert_string(self):
         cnvt = pgstore._copyfrom_buffer_convert_string
         # simple
-        self.assertEqual('babar', cnvt('babar'))
+        self.assertEqual(u'babar', cnvt('babar'))
         # unicode
-        self.assertEqual('\xc3\xa9l\xc3\xa9phant', cnvt(u'éléphant'))
-        self.assertEqual('\xe9l\xe9phant', cnvt(u'éléphant', encoding='latin1'))
+        self.assertEqual(u'éléphant', cnvt(u'éléphant'))
         # escaping
-        self.assertEqual('babar\\tceleste\\n', cnvt('babar\tceleste\n'))
-        self.assertEqual(r'C:\\new\tC:\\test', cnvt('C:\\new\tC:\\test'))
+        self.assertEqual(u'babar\\tceleste\\n', cnvt(u'babar\tceleste\n'))
+        self.assertEqual(u'C:\\\\new\\tC:\\\\test', cnvt(u'C:\\new\tC:\\test'))
 
     def test_convert_date(self):
         cnvt = pgstore._copyfrom_buffer_convert_date
@@ -73,12 +72,12 @@
                  DT.datetime(2014, 1, 1, 0, 0, 0)))
         results = pgstore._create_copyfrom_buffer(data)
         # all columns
-        expected = '''42\t42\t42.42\téléphant\t0666-01-13\t06:06:06.000000\t0666-06-13 06:06:06.000000
+        expected = u'''42\t42\t42.42\téléphant\t0666-01-13\t06:06:06.000000\t0666-06-13 06:06:06.000000
 6\t6\t6.6\tbabar\t2014-01-14\t04:02:01.000000\t2014-01-01 00:00:00.000000'''
         self.assertMultiLineEqual(expected, results.getvalue())
         # selected columns
         results = pgstore._create_copyfrom_buffer(data, columns=(1, 3, 6))
-        expected = '''42\téléphant\t0666-06-13 06:06:06.000000
+        expected = u'''42\téléphant\t0666-06-13 06:06:06.000000
 6\tbabar\t2014-01-01 00:00:00.000000'''
         self.assertMultiLineEqual(expected, results.getvalue())
 
@@ -88,8 +87,8 @@
                 dict(integer=6, double=6.6, text=u'babar',
                      date=DT.datetime(2014, 1, 1, 0, 0, 0)))
         results = pgstore._create_copyfrom_buffer(data, ('integer', 'text'))
-        expected = '''42\téléphant\n6\tbabar'''
-        self.assertMultiLineEqual(expected, results.getvalue())
+        expected = u'''42\téléphant\n6\tbabar'''
+        self.assertEqual(expected, results.getvalue())
 
 if __name__ == '__main__':
     unittest_main()