diff -r 058bb3dc685f -r 0b59724cb3f2 dataimport/csv.py --- a/dataimport/csv.py Mon Jan 04 18:40:30 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ -# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. -# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr -# -# This file is part of CubicWeb. -# -# CubicWeb is free software: you can redistribute it and/or modify it under the -# terms of the GNU Lesser General Public License as published by the Free -# Software Foundation, either version 2.1 of the License, or (at your option) -# any later version. -# -# CubicWeb is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more -# details. -# -# You should have received a copy of the GNU Lesser General Public License along -# with CubicWeb. If not, see . -"""Functions to help importing CSV data""" -from __future__ import absolute_import, print_function - -import codecs -import csv as csvmod -import warnings -import os.path as osp - -from six import PY2, PY3, string_types - -from logilab.common import shellutils - - -def count_lines(stream_or_filename): - if isinstance(stream_or_filename, string_types): - f = open(stream_or_filename) - else: - f = stream_or_filename - f.seek(0) - for i, line in enumerate(f): - pass - f.seek(0) - return i+1 - - -def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"', - skipfirst=False, withpb=True, skip_empty=True, separator=None, - quote=None): - """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows""" - if separator is not None: - delimiter = separator - warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead") - if quote is not None: - quotechar = quote - warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead") - if isinstance(stream_or_path, string_types): - stream = open(stream_or_path, 'rb') - else: - stream = stream_or_path - rowcount = count_lines(stream) - if skipfirst: - rowcount -= 1 - if withpb: - pb = shellutils.ProgressBar(rowcount, 50) - for urow in ucsvreader(stream, encoding, delimiter, quotechar, - skipfirst=skipfirst, skip_empty=skip_empty): - yield urow - if withpb: - pb.update() - print(' %s rows imported' % rowcount) - - -def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"', - skipfirst=False, ignore_errors=False, skip_empty=True, - separator=None, quote=None): - """A csv reader that accepts files with any encoding and outputs unicode - strings - - if skip_empty (the default), lines without any values specified (only - separators) will be skipped. This is useful for Excel exports which may be - full of such lines. - """ - if PY3: - stream = codecs.getreader(encoding)(stream) - if separator is not None: - delimiter = separator - warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead") - if quote is not None: - quotechar = quote - warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead") - it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar)) - if not ignore_errors: - if skipfirst: - next(it) - for row in it: - if PY2: - decoded = [item.decode(encoding) for item in row] - else: - decoded = row - if not skip_empty or any(decoded): - yield decoded - else: - if skipfirst: - try: - row = next(it) - except csvmod.Error: - pass - # Safe version, that can cope with error in CSV file - while True: - try: - row = next(it) - # End of CSV, break - except StopIteration: - break - # Error in CSV, ignore line and continue - except csvmod.Error: - continue - if PY2: - decoded = [item.decode(encoding) for item in row] - else: - decoded = row - if not skip_empty or any(decoded): - yield decoded