dataimport/csv.py
author Julien Cristau <julien.cristau@logilab.fr>
Tue, 28 Jul 2015 09:25:26 +0200
changeset 10553 1d824df4f2bd
parent 10513 7bec01a59f92
child 10589 7c23b7de2b8d
child 11404 98eebbe3de23
permissions -rw-r--r--
Fix (de)serialization of ComputedRelation read permissions For normal relation types, permissions don't need to be stored since they're just default values for the relation definitions. However, computed relations are serialized (as CWComputedRType), while their relation definitions are added at schema finalization time, and are only in memory. So add the 'read_permission' relation to CWComputedRType, and the appropriate hooks to save and restore those permissions. To avoid having to touch yams, we drop the 'add' and 'delete' permissions from the default computed relation permissions; this should probably be backported there. The actual permissions (set on the relation definitions) are hardcoded in finalize_computed_relations anyway. In deserialize_schema, the CWComputedRType handling needs to be delayed a little bit, until after we've called deserialize_ertype_permissions. The rql2sql test is adjusted because CWComputedRType has a 'name' attribute and the 'read_permission' relation, which generates ambiguity vs CWEType. We add an explicit CubicWebRelationSchema.check_permission_definitions, since we need to check both that computed and non-computed rtypes are defined properly. Based on report and initial patch from Christophe de Vienne (thanks!). Closes #5706307

# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
"""Functions to help importing CSV data"""

from __future__ import absolute_import

import csv as csvmod
import warnings
import os.path as osp

from logilab.common import shellutils


def count_lines(stream_or_filename):
    if isinstance(stream_or_filename, basestring):
        f = open(stream_or_filename)
    else:
        f = stream_or_filename
        f.seek(0)
    for i, line in enumerate(f):
        pass
    f.seek(0)
    return i+1


def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
                  skipfirst=False, withpb=True, skip_empty=True, separator=None,
                  quote=None):
    """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
    if separator is not None:
        delimiter = separator
        warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    if quote is not None:
        quotechar = quote
        warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    if isinstance(stream_or_path, basestring):
        if not osp.exists(stream_or_path):
            raise Exception("file doesn't exists: %s" % stream_or_path)
        stream = open(stream_or_path)
    else:
        stream = stream_or_path
    rowcount = count_lines(stream)
    if skipfirst:
        rowcount -= 1
    if withpb:
        pb = shellutils.ProgressBar(rowcount, 50)
    for urow in ucsvreader(stream, encoding, delimiter, quotechar,
                           skipfirst=skipfirst, skip_empty=skip_empty):
        yield urow
        if withpb:
            pb.update()
    print ' %s rows imported' % rowcount


def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"',
               skipfirst=False, ignore_errors=False, skip_empty=True,
               separator=None, quote=None):
    """A csv reader that accepts files with any encoding and outputs unicode
    strings

    if skip_empty (the default), lines without any values specified (only
    separators) will be skipped. This is useful for Excel exports which may be
    full of such lines.
    """
    if separator is not None:
        delimiter = separator
        warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    if quote is not None:
        quotechar = quote
        warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    if not ignore_errors:
        if skipfirst:
            it.next()
        for row in it:
            decoded = [item.decode(encoding) for item in row]
            if not skip_empty or any(decoded):
                yield decoded
    else:
        if skipfirst:
            try:
                row = it.next()
            except csvmod.Error:
                pass
        # Safe version, that can cope with error in CSV file
        while True:
            try:
                row = it.next()
            # End of CSV, break
            except StopIteration:
                break
            # Error in CSV, ignore line and continue
            except csvmod.Error:
                continue
            decoded = [item.decode(encoding) for item in row]
            if not skip_empty or any(decoded):
                yield decoded