cubicweb/dataimport/csv.py
changeset 12567 26744ad37953
parent 12504 362fdb399ff5
equal deleted inserted replaced
12566:6b3523f81f42 12567:26744ad37953
    14 # details.
    14 # details.
    15 #
    15 #
    16 # You should have received a copy of the GNU Lesser General Public License along
    16 # You should have received a copy of the GNU Lesser General Public License along
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    18 """Functions to help importing CSV data"""
    18 """Functions to help importing CSV data"""
    19 from __future__ import absolute_import, print_function
       
    20 
       
    21 import codecs
    19 import codecs
    22 import csv as csvmod
    20 import csv as csvmod
    23 
       
    24 from six import PY2, PY3, string_types
       
    25 
    21 
    26 from logilab.common import shellutils
    22 from logilab.common import shellutils
    27 
    23 
    28 
    24 
    29 def count_lines(stream_or_filename):
    25 def count_lines(stream_or_filename):
    30     if isinstance(stream_or_filename, string_types):
    26     if isinstance(stream_or_filename, str):
    31         f = open(stream_or_filename)
    27         f = open(stream_or_filename)
    32     else:
    28     else:
    33         f = stream_or_filename
    29         f = stream_or_filename
    34         f.seek(0)
    30         f.seek(0)
    35     i = 0  # useful is f is an empty file
    31     i = 0  # useful is f is an empty file
    40 
    36 
    41 
    37 
    42 def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
    38 def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
    43                   skipfirst=False, withpb=True, skip_empty=True):
    39                   skipfirst=False, withpb=True, skip_empty=True):
    44     """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
    40     """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
    45     if isinstance(stream_or_path, string_types):
    41     if isinstance(stream_or_path, str):
    46         stream = open(stream_or_path, 'rb')
    42         stream = open(stream_or_path, 'rb')
    47     else:
    43     else:
    48         stream = stream_or_path
    44         stream = stream_or_path
    49     rowcount = count_lines(stream)
    45     rowcount = count_lines(stream)
    50     if skipfirst:
    46     if skipfirst:
    66 
    62 
    67     if skip_empty (the default), lines without any values specified (only
    63     if skip_empty (the default), lines without any values specified (only
    68     separators) will be skipped. This is useful for Excel exports which may be
    64     separators) will be skipped. This is useful for Excel exports which may be
    69     full of such lines.
    65     full of such lines.
    70     """
    66     """
    71     if PY3:
    67     stream = codecs.getreader(encoding)(stream)
    72         stream = codecs.getreader(encoding)(stream)
       
    73     it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    68     it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    74     if not ignore_errors:
    69     if not ignore_errors:
    75         if skipfirst:
    70         if skipfirst:
    76             next(it)
    71             next(it)
    77         for row in it:
    72         for row in it:
    78             if PY2:
    73             if not skip_empty or any(row):
    79                 decoded = [item.decode(encoding) for item in row]
    74                 yield row
    80             else:
       
    81                 decoded = row
       
    82             if not skip_empty or any(decoded):
       
    83                 yield decoded
       
    84     else:
    75     else:
    85         if skipfirst:
    76         if skipfirst:
    86             try:
    77             try:
    87                 row = next(it)
    78                 row = next(it)
    88             except csvmod.Error:
    79             except csvmod.Error:
    95             except StopIteration:
    86             except StopIteration:
    96                 break
    87                 break
    97             # Error in CSV, ignore line and continue
    88             # Error in CSV, ignore line and continue
    98             except csvmod.Error:
    89             except csvmod.Error:
    99                 continue
    90                 continue
   100             if PY2:
    91             if not skip_empty or any(row):
   101                 decoded = [item.decode(encoding) for item in row]
    92                 yield row
   102             else:
       
   103                 decoded = row
       
   104             if not skip_empty or any(decoded):
       
   105                 yield decoded