# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
"""Functions to help importing CSV data"""
from __future__ import absolute_import, print_function
import csv as csvmod
import warnings
import os.path as osp
from six import string_types
from logilab.common import shellutils
def count_lines(stream_or_filename):
if isinstance(stream_or_filename, string_types):
f = open(stream_or_filename)
else:
f = stream_or_filename
f.seek(0)
for i, line in enumerate(f):
pass
f.seek(0)
return i+1
def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
skipfirst=False, withpb=True, skip_empty=True, separator=None,
quote=None):
"""same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
if separator is not None:
delimiter = separator
warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
if quote is not None:
quotechar = quote
warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
if isinstance(stream_or_path, string_types):
if not osp.exists(stream_or_path):
raise Exception("file doesn't exists: %s" % stream_or_path)
stream = open(stream_or_path)
else:
stream = stream_or_path
rowcount = count_lines(stream)
if skipfirst:
rowcount -= 1
if withpb:
pb = shellutils.ProgressBar(rowcount, 50)
for urow in ucsvreader(stream, encoding, delimiter, quotechar,
skipfirst=skipfirst, skip_empty=skip_empty):
yield urow
if withpb:
pb.update()
print(' %s rows imported' % rowcount)
def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"',
skipfirst=False, ignore_errors=False, skip_empty=True,
separator=None, quote=None):
"""A csv reader that accepts files with any encoding and outputs unicode
strings
if skip_empty (the default), lines without any values specified (only
separators) will be skipped. This is useful for Excel exports which may be
full of such lines.
"""
if separator is not None:
delimiter = separator
warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
if quote is not None:
quotechar = quote
warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
if not ignore_errors:
if skipfirst:
it.next()
for row in it:
decoded = [item.decode(encoding) for item in row]
if not skip_empty or any(decoded):
yield decoded
else:
if skipfirst:
try:
row = it.next()
except csvmod.Error:
pass
# Safe version, that can cope with error in CSV file
while True:
try:
row = it.next()
# End of CSV, break
except StopIteration:
break
# Error in CSV, ignore line and continue
except csvmod.Error:
continue
decoded = [item.decode(encoding) for item in row]
if not skip_empty or any(decoded):
yield decoded