cubicweb/dataimport/csv.py
author Denis Laxalde <denis.laxalde@logilab.fr>
Thu, 09 Mar 2017 16:36:33 +0100
changeset 12053 c3c9f2e1424c
parent 11461 f5a4e14d1dd2
child 12504 362fdb399ff5
permissions -rw-r--r--
[pyramid] Add a "pyramid" instance configuration type In a new module 'cubicweb.pyramid.config' we define a "pyramid" instance configuration type. The noticeable feature of this configuration is that it manages a 'development.ini' file that gets installed in application home (along with `.conf` file). This file is templated and includes generated values for secrets of session and authtk tokens. This means that we can just call: pserve etc/cubicweb.d/<appname>/development.ini or gunicorn --paste etc/cubicweb.d/<appname>/development.ini -b :8080 just after instance creation to get a pyramid instance running without having to hack around a 'pyramid.ini' file. This patch drops 'development.ini' from skeleton and moves it in cubicweb/pyramid so that it gets installed at instance creation which is more appropriate than in cube creation. The new configuration class sets "cubicweb.bwcompat" setting to false so it is not intended to replace the "all-in-one" configuration type (which would require a bit more work). This configuration is close to the the 'repository' configuration type with just a couple of options from WebConfiguration that are needed for Pyramid (anonymous user/password plus some miscellaneous options that I'm not so sure are really needed). Note, in particular, that we do not pull CORS settings to be injected as a WSGI middleware like in wsgi_application_from_cwconfig() since I believe this should be left as an end-user responsibility and since this can be defined in a standard way in paste configuration. This configuration inherits from ServerConfiguration but registers the same appobjects as WebConfiguration. In cubicweb.web.request._CubicWebRequestBase, we guard against access to "uiprops" and "datadir_url" of the config because this new "pyramid" config does not have these (this does not make sense without bwcompat mode). At some point, we should either avoid using `cw_request`'s pyramid request attribute or make cubicweb's web request really independant of existing implementation and drop these assumptions.

# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
#
# This file is part of CubicWeb.
#
# CubicWeb is free software: you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 2.1 of the License, or (at your option)
# any later version.
#
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
"""Functions to help importing CSV data"""
from __future__ import absolute_import, print_function

import codecs
import csv as csvmod
import warnings

from six import PY2, PY3, string_types

from logilab.common import shellutils


def count_lines(stream_or_filename):
    if isinstance(stream_or_filename, string_types):
        f = open(stream_or_filename)
    else:
        f = stream_or_filename
        f.seek(0)
    i = 0  # useful is f is an empty file
    for i, line in enumerate(f):
        pass
    f.seek(0)
    return i + 1


def ucsvreader_pb(stream_or_path, encoding='utf-8', delimiter=',', quotechar='"',
                  skipfirst=False, withpb=True, skip_empty=True, separator=None,
                  quote=None):
    """same as :func:`ucsvreader` but a progress bar is displayed as we iter on rows"""
    if separator is not None:
        delimiter = separator
        warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    if quote is not None:
        quotechar = quote
        warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    if isinstance(stream_or_path, string_types):
        stream = open(stream_or_path, 'rb')
    else:
        stream = stream_or_path
    rowcount = count_lines(stream)
    if skipfirst:
        rowcount -= 1
    if withpb:
        pb = shellutils.ProgressBar(rowcount, 50)
    for urow in ucsvreader(stream, encoding, delimiter, quotechar,
                           skipfirst=skipfirst, skip_empty=skip_empty):
        yield urow
        if withpb:
            pb.update()
    print(' %s rows imported' % rowcount)


def ucsvreader(stream, encoding='utf-8', delimiter=',', quotechar='"',
               skipfirst=False, ignore_errors=False, skip_empty=True,
               separator=None, quote=None):
    """A csv reader that accepts files with any encoding and outputs unicode
    strings

    if skip_empty (the default), lines without any values specified (only
    separators) will be skipped. This is useful for Excel exports which may be
    full of such lines.
    """
    if PY3:
        stream = codecs.getreader(encoding)(stream)
    if separator is not None:
        delimiter = separator
        warnings.warn("[3.20] 'separator' kwarg is deprecated, use 'delimiter' instead")
    if quote is not None:
        quotechar = quote
        warnings.warn("[3.20] 'quote' kwarg is deprecated, use 'quotechar' instead")
    it = iter(csvmod.reader(stream, delimiter=delimiter, quotechar=quotechar))
    if not ignore_errors:
        if skipfirst:
            next(it)
        for row in it:
            if PY2:
                decoded = [item.decode(encoding) for item in row]
            else:
                decoded = row
            if not skip_empty or any(decoded):
                yield decoded
    else:
        if skipfirst:
            try:
                row = next(it)
            except csvmod.Error:
                pass
        # Safe version, that can cope with error in CSV file
        while True:
            try:
                row = next(it)
            # End of CSV, break
            except StopIteration:
                break
            # Error in CSV, ignore line and continue
            except csvmod.Error:
                continue
            if PY2:
                decoded = [item.decode(encoding) for item in row]
            else:
                decoded = row
            if not skip_empty or any(decoded):
                yield decoded