cubicweb: comparison dataimport/csv.py

equal deleted inserted replaced

-:99bdd4bddd77
+:7bec01a59f92
+# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+#
+# This file is part of CubicWeb.
+#
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
+"""Functions to help importing CSV data"""
+from __future__ import absolute_import
+import csv as csvmod
+import warnings
+import os.path as osp
+from logilab.common import shellutils
+def count_lines(stream_or_filename):
+if isinstance(stream_or_filename, basestring):
+f = open(stream_or_filename)
+else:
+f = stream_or_filename
+f.seek(0)
+for i, line in enumerate(f):
+pass
+f.seek(0)
+return i+1
+def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
+skipfirst=False, withpb=True, skip_empty=True, separator=None,
+quote=None):
+"""same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
+if separator is not None:
+delimiter = separator
+warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
+if quote is not None:
+quotechar = quote
+warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
+if isinstance(stream_or_path, basestring):
+if not osp.exists(stream_or_path):
+raise Exception("file doesn't exists: %s" % stream_or_path)
+stream = open(stream_or_path)
+else:
+stream = stream_or_path
+rowcount = count_lines(stream)
+if skipfirst:
+rowcount -= 1
+if withpb:
+pb = shellutils.ProgressBar(rowcount, 50)
+for urow in ucsvreader(stream, encoding, delimiter, quotechar,
+skipfirst=skipfirst, skip_empty=skip_empty):
+yield urow
+if withpb:
+pb.update()
+print ' %s rows imported' % rowcount
+def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"',
+skipfirst=False, ignore_errors=False, skip_empty=True,
+separator=None, quote=None):
+"""A csv reader that accepts files with any encoding and outputs unicode
+strings
+if skip_empty (the default), lines without any values specified (only
+separators) will be skipped. This is useful for Excel exports which may be
+full of such lines.
+"""
+if separator is not None:
+delimiter = separator
+warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
+if quote is not None:
+quotechar = quote
+warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
+it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
+if not ignore_errors:
+if skipfirst:
+it.next()
+for row in it:
+decoded = [item.decode(encoding) for item in row]
+if not skip_empty or any(decoded):
+yield decoded
+else:
+if skipfirst:
+try:
+row = it.next()
+except csvmod.Error:
+pass
+# Safe version, that can cope with error in CSV file
+while True:
+try:
+row = it.next()
+# End of CSV, break
+except StopIteration:
+break
+# Error in CSV, ignore line and continue
+except csvmod.Error:
+continue
+decoded = [item.decode(encoding) for item in row]
+if not skip_empty or any(decoded):
+yield decoded

changeset 10513	7bec01a59f92
child 10589	7c23b7de2b8d
child 11404	98eebbe3de23