cubicweb/dataimport/csv.py
author Denis Laxalde <denis.laxalde@logilab.fr>
Thu, 30 Nov 2017 11:55:35 +0100
changeset 12242 68ca7fe0ca29
parent 11461 f5a4e14d1dd2
child 12504 362fdb399ff5
permissions -rw-r--r--
[ssplanner] Prevent execution of write queries involving computed relations Previously, setting a computed relation upon entity creation or modification (using the ORM or an RQL query) would usually fail with an operational error in the backend ("no such table"). However, under some mysterious circumstances (like passing a string as value in cw_set for a computed relation), the RQL to SQL transformation would simply drop the clause. To prevent this to happen, we add a check for computed relation before adding a relation to an execution plan. This check raises a QueryError. It happens in several places: * in querier.InsertPlan.add_relation_def() (called from several places in ssplanner steps) for INSERT queries, * in ssplanner.UpdateStep.execute() for SET queries and, * in ssplanner.SSplanner.build_delete_plan() for DELETE queries. Tests added to unittest_querier.py because unittest_sslplanner.py looked inappropriate (it has only unit tests) and the former already had a NonRegressionTC class.

# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
"""Functions to help importing CSV data"""
from __future__ import absolute_import, print_function

import codecs
import csv as csvmod
import warnings

from six import PY2, PY3, string_types

from logilab.common import shellutils


def count_lines(stream_or_filename):
    if isinstance(stream_or_filename, string_types):
        f = open(stream_or_filename)
    else:
        f = stream_or_filename
        f.seek(0)
    i = 0  # useful is f is an empty file
    for i, line in enumerate(f):
        pass
    f.seek(0)
    return i + 1


def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
                  skipfirst=False, withpb=True, skip_empty=True, separator=None,
                  quote=None):
    """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
    if separator is not None:
        delimiter = separator
        warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    if quote is not None:
        quotechar = quote
        warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    if isinstance(stream_or_path, string_types):
        stream = open(stream_or_path, 'rb')
    else:
        stream = stream_or_path
    rowcount = count_lines(stream)
    if skipfirst:
        rowcount -= 1
    if withpb:
        pb = shellutils.ProgressBar(rowcount, 50)
    for urow in ucsvreader(stream, encoding, delimiter, quotechar,
                           skipfirst=skipfirst, skip_empty=skip_empty):
        yield urow
        if withpb:
            pb.update()
    print(' %s rows imported' % rowcount)


def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"',
               skipfirst=False, ignore_errors=False, skip_empty=True,
               separator=None, quote=None):
    """A csv reader that accepts files with any encoding and outputs unicode
    strings

    if skip_empty (the default), lines without any values specified (only
    separators) will be skipped. This is useful for Excel exports which may be
    full of such lines.
    """
    if PY3:
        stream = codecs.getreader(encoding)(stream)
    if separator is not None:
        delimiter = separator
        warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    if quote is not None:
        quotechar = quote
        warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    if not ignore_errors:
        if skipfirst:
            next(it)
        for row in it:
            if PY2:
                decoded = [item.decode(encoding) for item in row]
            else:
                decoded = row
            if not skip_empty or any(decoded):
                yield decoded
    else:
        if skipfirst:
            try:
                row = next(it)
            except csvmod.Error:
                pass
        # Safe version, that can cope with error in CSV file
        while True:
            try:
                row = next(it)
            # End of CSV, break
            except StopIteration:
                break
            # Error in CSV, ignore line and continue
            except csvmod.Error:
                continue
            if PY2:
                decoded = [item.decode(encoding) for item in row]
            else:
                decoded = row
            if not skip_empty or any(decoded):
                yield decoded