[dataimport] fix ucsvreader for python3
authorJulien Cristau <julien.cristau@logilab.fr>
Fri, 16 Oct 2015 17:06:28 +0200
changeset 10807 bb0c7dbd1fe7
parent 10806 bd98cd3b7869
child 10808 b6b2e1bcd1b2
[dataimport] fix ucsvreader for python3 The csv module in python3 wants str, so use a StreamReader to feed csv.reader and don't decode the output
dataimport/csv.py
dataimport/test/unittest_importer.py
--- a/dataimport/csv.py	Wed Oct 14 16:30:45 2015 +0200
+++ b/dataimport/csv.py	Fri Oct 16 17:06:28 2015 +0200
@@ -18,11 +18,12 @@
 """Functions to help importing CSV data"""
 from __future__ import absolute_import, print_function
 
+import codecs
 import csv as csvmod
 import warnings
 import os.path as osp
 
-from six import string_types
+from six import PY2, PY3, string_types
 
 from logilab.common import shellutils
 
@@ -50,9 +51,7 @@
         quotechar = quote
         warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
     if isinstance(stream_or_path, string_types):
-        if not osp.exists(stream_or_path):
-            raise Exception("file doesn't exists: %s" % stream_or_path)
-        stream = open(stream_or_path)
+        stream = open(stream_or_path, 'rb')
     else:
         stream = stream_or_path
     rowcount = count_lines(stream)
@@ -78,6 +77,8 @@
     separators) will be skipped. This is useful for Excel exports which may be
     full of such lines.
     """
+    if PY3:
+        stream = codecs.getreader(encoding)(stream)
     if separator is not None:
         delimiter = separator
         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
@@ -89,7 +90,10 @@
         if skipfirst:
             next(it)
         for row in it:
-            decoded = [item.decode(encoding) for item in row]
+            if PY2:
+                decoded = [item.decode(encoding) for item in row]
+            else:
+                decoded = row
             if not skip_empty or any(decoded):
                 yield decoded
     else:
@@ -108,7 +112,10 @@
             # Error in CSV, ignore line and continue
             except csvmod.Error:
                 continue
-            decoded = [item.decode(encoding) for item in row]
+            if PY2:
+                decoded = [item.decode(encoding) for item in row]
+            else:
+                decoded = row
             if not skip_empty or any(decoded):
                 yield decoded
 
--- a/dataimport/test/unittest_importer.py	Wed Oct 14 16:30:45 2015 +0200
+++ b/dataimport/test/unittest_importer.py	Fri Oct 16 17:06:28 2015 +0200
@@ -167,7 +167,7 @@
 
 def extentities_from_csv(fpath):
     """Yield ExtEntity read from `fpath` CSV file."""
-    with open(fpath) as f:
+    with open(fpath, 'rb') as f:
         for uri, name, knows in ucsvreader(f, skipfirst=True, skip_empty=False):
             yield ExtEntity('Personne', uri,
                             {'nom': set([name]), 'connait': set([knows])})