dataimport/csv.py
changeset 10807 bb0c7dbd1fe7
parent 10669 155c29e0ed1c
child 10907 9ae707db5265
equal deleted inserted replaced
10806:bd98cd3b7869 10807:bb0c7dbd1fe7
    16 # You should have received a copy of the GNU Lesser General Public License along
    16 # You should have received a copy of the GNU Lesser General Public License along
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    18 """Functions to help importing CSV data"""
    18 """Functions to help importing CSV data"""
    19 from __future__ import absolute_import, print_function
    19 from __future__ import absolute_import, print_function
    20 
    20 
       
    21 import codecs
    21 import csv as csvmod
    22 import csv as csvmod
    22 import warnings
    23 import warnings
    23 import os.path as osp
    24 import os.path as osp
    24 
    25 
    25 from six import string_types
    26 from six import PY2, PY3, string_types
    26 
    27 
    27 from logilab.common import shellutils
    28 from logilab.common import shellutils
    28 
    29 
    29 
    30 
    30 def count_lines(stream_or_filename):
    31 def count_lines(stream_or_filename):
    48         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    49         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    49     if quote is not None:
    50     if quote is not None:
    50         quotechar = quote
    51         quotechar = quote
    51         warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    52         warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    52     if isinstance(stream_or_path, string_types):
    53     if isinstance(stream_or_path, string_types):
    53         if not osp.exists(stream_or_path):
    54         stream = open(stream_or_path, 'rb')
    54             raise Exception("file doesn't exists: %s" % stream_or_path)
       
    55         stream = open(stream_or_path)
       
    56     else:
    55     else:
    57         stream = stream_or_path
    56         stream = stream_or_path
    58     rowcount = count_lines(stream)
    57     rowcount = count_lines(stream)
    59     if skipfirst:
    58     if skipfirst:
    60         rowcount -= 1
    59         rowcount -= 1
    76 
    75 
    77     if skip_empty (the default), lines without any values specified (only
    76     if skip_empty (the default), lines without any values specified (only
    78     separators) will be skipped. This is useful for Excel exports which may be
    77     separators) will be skipped. This is useful for Excel exports which may be
    79     full of such lines.
    78     full of such lines.
    80     """
    79     """
       
    80     if PY3:
       
    81         stream = codecs.getreader(encoding)(stream)
    81     if separator is not None:
    82     if separator is not None:
    82         delimiter = separator
    83         delimiter = separator
    83         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    84         warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    84     if quote is not None:
    85     if quote is not None:
    85         quotechar = quote
    86         quotechar = quote
    87     it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    88     it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    88     if not ignore_errors:
    89     if not ignore_errors:
    89         if skipfirst:
    90         if skipfirst:
    90             next(it)
    91             next(it)
    91         for row in it:
    92         for row in it:
    92             decoded = [item.decode(encoding) for item in row]
    93             if PY2:
       
    94                 decoded = [item.decode(encoding) for item in row]
       
    95             else:
       
    96                 decoded = row
    93             if not skip_empty or any(decoded):
    97             if not skip_empty or any(decoded):
    94                 yield decoded
    98                 yield decoded
    95     else:
    99     else:
    96         if skipfirst:
   100         if skipfirst:
    97             try:
   101             try:
   106             except StopIteration:
   110             except StopIteration:
   107                 break
   111                 break
   108             # Error in CSV, ignore line and continue
   112             # Error in CSV, ignore line and continue
   109             except csvmod.Error:
   113             except csvmod.Error:
   110                 continue
   114                 continue
   111             decoded = [item.decode(encoding) for item in row]
   115             if PY2:
       
   116                 decoded = [item.decode(encoding) for item in row]
       
   117             else:
       
   118                 decoded = row
   112             if not skip_empty or any(decoded):
   119             if not skip_empty or any(decoded):
   113                 yield decoded
   120                 yield decoded
   114 
   121