dataimport/pgstore.py
author Sylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 21 Jun 2016 07:42:30 +0200
changeset 11371 2cc16363d6a3
parent 10986 ca8321b32392
child 10991 7ceb0971c694
permissions -rw-r--r--
backport 3.20 changes
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
# This file is part of CubicWeb.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
# CubicWeb is free software: you can redistribute it and/or modify it under the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# terms of the GNU Lesser General Public License as published by the Free
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# Software Foundation, either version 2.1 of the License, or (at your option)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# any later version.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# details.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
# You should have received a copy of the GNU Lesser General Public License along
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
"""Postgres specific store"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
import warnings
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    21
import cPickle
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
import os.path as osp
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    23
from StringIO import StringIO
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
from time import asctime
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    25
from datetime import date, datetime, time
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    26
from collections import defaultdict
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
from base64 import b64encode
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
from cubicweb.utils import make_uid
10982
20bf21bb16e4 [dataimport] add missing import 'eschema_eid'
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10978
diff changeset
    30
from cubicweb.server.utils import eschema_eid
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    31
from cubicweb.server.sqlutils import SQL_PREFIX
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
from cubicweb.dataimport.stores import NoHookRQLObjectStore
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
def _execmany_thread_not_copy_from(cu, statement, data, table=None,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
                                   columns=None, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
    """ Execute thread without copy from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
    cu.executemany(statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
def _execmany_thread_copy_from(cu, statement, data, table,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
                               columns, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
    """ Execute thread with copy from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
    """
10978
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    45
    try:
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    46
        buf = _create_copyfrom_buffer(data, columns, encoding=encoding)
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    47
    except ValueError:
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    48
        _execmany_thread_not_copy_from(cu, statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    49
    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
        if columns is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
            cu.copy_from(buf, table, null='NULL')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
            cu.copy_from(buf, table, null='NULL', columns=columns)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    54
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    55
def _execmany_thread(sql_connect, statements, dump_output_dir=None,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    56
                     support_copy_from=True, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
    Execute sql statement. If 'INSERT INTO', try to use 'COPY FROM' command,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
    or fallback to execute_many.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
    if support_copy_from:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
        execmany_func = _execmany_thread_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
        execmany_func = _execmany_thread_not_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
    cnx = sql_connect()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
    cu = cnx.cursor()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
    try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
        for statement, data in statements:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
            table = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
            columns = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
            try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
                if not statement.startswith('INSERT INTO'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
                    cu.executemany(statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
                    continue
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
                table = statement.split()[2]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
                if isinstance(data[0], (tuple, list)):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
                    columns = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
                else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
                    columns = list(data[0])
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
                execmany_func(cu, statement, data, table, columns, encoding)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
            except Exception:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    82
                print 'unable to copy data into table %s' % table
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    83
                # Error in import statement, save data in dump_output_dir
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    84
                if dump_output_dir is not None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    85
                    pdata = {'data': data, 'statement': statement,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    86
                             'time': asctime(), 'columns': columns}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    87
                    filename = make_uid()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    88
                    try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    89
                        with open(osp.join(dump_output_dir,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    90
                                           '%s.pickle' % filename), 'w') as fobj:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    91
                            fobj.write(cPickle.dumps(pdata))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
                    except IOError:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    93
                        print 'ERROR while pickling in', dump_output_dir, filename+'.pickle'
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    94
                        pass
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    95
                cnx.rollback()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
                raise
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
    finally:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
        cnx.commit()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
        cu.close()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
def _copyfrom_buffer_convert_None(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   103
    '''Convert None value to "NULL"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   104
    return 'NULL'
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   105
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   106
def _copyfrom_buffer_convert_number(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   107
    '''Convert a number into its string representation'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   108
    return str(value)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   109
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   110
def _copyfrom_buffer_convert_string(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   111
    '''Convert string value.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   112
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
    Recognized keywords:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   114
    :encoding: resulting string encoding (default: utf-8)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   115
    '''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   116
    encoding = opts.get('encoding','utf-8')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
    escape_chars = ((u'\\', ur'\\'), (u'\t', u'\\t'), (u'\r', u'\\r'),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
                    (u'\n', u'\\n'))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   119
    for char, replace in escape_chars:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   120
        value = value.replace(char, replace)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   121
    if isinstance(value, unicode):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   122
        value = value.encode(encoding)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   123
    return value
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   124
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   125
def _copyfrom_buffer_convert_date(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   126
    '''Convert date into "YYYY-MM-DD"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   127
    # Do not use strftime, as it yields issue with date < 1900
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   128
    # (http://bugs.python.org/issue1777412)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   129
    return '%04d-%02d-%02d' % (value.year, value.month, value.day)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   130
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   131
def _copyfrom_buffer_convert_datetime(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   132
    '''Convert date into "YYYY-MM-DD HH:MM:SS.UUUUUU"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   133
    # Do not use strftime, as it yields issue with date < 1900
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   134
    # (http://bugs.python.org/issue1777412)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   135
    return '%s %s' % (_copyfrom_buffer_convert_date(value, **opts),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   136
                      _copyfrom_buffer_convert_time(value, **opts))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   137
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   138
def _copyfrom_buffer_convert_time(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   139
    '''Convert time into "HH:MM:SS.UUUUUU"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   140
    return '%02d:%02d:%02d.%06d' % (value.hour, value.minute,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   141
                                    value.second, value.microsecond)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   142
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   143
# (types, converter) list.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   144
_COPYFROM_BUFFER_CONVERTERS = [
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   145
    (type(None), _copyfrom_buffer_convert_None),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
    ((long, int, float), _copyfrom_buffer_convert_number),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   147
    (basestring, _copyfrom_buffer_convert_string),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   148
    (datetime, _copyfrom_buffer_convert_datetime),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   149
    (date, _copyfrom_buffer_convert_date),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   150
    (time, _copyfrom_buffer_convert_time),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   151
]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   152
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
def _create_copyfrom_buffer(data, columns=None, **convert_opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   154
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   155
    Create a StringIO buffer for 'COPY FROM' command.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   156
    Deals with Unicode, Int, Float, Date... (see ``converters``)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   157
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   158
    :data: a sequence/dict of tuples
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   159
    :columns: list of columns to consider (default to all columns)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   160
    :converter_opts: keyword arguements given to converters
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   161
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   162
    # Create a list rather than directly create a StringIO
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   163
    # to correctly write lines separated by '\n' in a single step
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   164
    rows = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   165
    if columns is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
        if isinstance(data[0], (tuple, list)):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
            columns = range(len(data[0]))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
        elif isinstance(data[0], dict):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   169
            columns = data[0].keys()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   170
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
            raise ValueError('Could not get columns: you must provide columns.')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
    for row in data:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
        # Iterate over the different columns and the different values
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
        # and try to convert them to a correct datatype.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
        # If an error is raised, do not continue.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
        formatted_row = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
        for col in columns:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
            try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
                value = row[col]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
            except KeyError:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
                warnings.warn(u"Column %s is not accessible in row %s"
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   182
                              % (col, row), RuntimeWarning)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
                # XXX 'value' set to None so that the import does not end in
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   184
                # error.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   185
                # Instead, the extra keys are set to NULL from the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   186
                # database point of view.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   187
                value = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   188
            for types, converter in _COPYFROM_BUFFER_CONVERTERS:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
                if isinstance(value, types):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   190
                    value = converter(value, **convert_opts)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
                    break
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
            else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   193
                raise ValueError("Unsupported value type %s" % type(value))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   194
            # We push the value to the new formatted row
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
            # if the value is not None and could be converted to a string.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
            formatted_row.append(value)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   197
        rows.append('\t'.join(formatted_row))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
    return StringIO('\n'.join(rows))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   199
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   200
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   201
class SQLGenObjectStore(NoHookRQLObjectStore):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   202
    """Controller of the data import process. This version is based
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   203
    on direct insertions throught SQL command (COPY FROM or execute many).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   204
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   205
    >>> store = SQLGenObjectStore(cnx)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   206
    >>> store.create_entity('Person', ...)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   207
    >>> store.flush()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   208
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   210
    def __init__(self, cnx, dump_output_dir=None, nb_threads_statement=1):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
        """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   212
        Initialize a SQLGenObjectStore.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   214
        Parameters:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
          - cnx: connection on the cubicweb instance
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   217
          - dump_output_dir: a directory to dump failed statements
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   218
            for easier recovery. Default is None (no dump).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   219
        """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
        super(SQLGenObjectStore, self).__init__(cnx)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   221
        ### hijack default source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   222
        self.source = SQLGenSourceWrapper(
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
            self.source, cnx.vreg.schema,
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   224
            dump_output_dir=dump_output_dir)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   225
        ### XXX This is done in super().__init__(), but should be
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
        ### redone here to link to the correct source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   227
        self.add_relation = self.source.add_relation
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   228
        self.indexes_etypes = {}
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   229
        if nb_threads_statement != 1:
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   230
            warn('[3.21] SQLGenObjectStore is no longer threaded', DeprecationWarning)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   231
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   232
    def flush(self):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   233
        """Flush data to the database"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   234
        self.source.flush()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   235
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
    def relate(self, subj_eid, rtype, obj_eid, **kwargs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   237
        if subj_eid is None or obj_eid is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   238
            return
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   239
        # XXX Could subjtype be inferred ?
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   240
        self.source.add_relation(self._cnx, subj_eid, rtype, obj_eid,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   241
                                 self.rschema(rtype).inlined, **kwargs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   242
        if self.rschema(rtype).symmetric:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   243
            self.source.add_relation(self._cnx, obj_eid, rtype, subj_eid,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   244
                                     self.rschema(rtype).inlined, **kwargs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   245
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
    def drop_indexes(self, etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
        """Drop indexes for a given entity type"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   248
        if etype not in self.indexes_etypes:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
            cu = self._cnx.cnxset.cu
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
            def index_to_attr(index):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   251
                """turn an index name to (database) attribute name"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
                return index.replace(etype.lower(), '').replace('idx', '').strip('_')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   253
            indices = [(index, index_to_attr(index))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
                       for index in self.source.dbhelper.list_indices(cu, etype)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   255
                       # Do not consider 'cw_etype_pkey' index
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   256
                       if not index.endswith('key')]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   257
            self.indexes_etypes[etype] = indices
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   258
        for index, attr in self.indexes_etypes[etype]:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   259
            self._cnx.system_sql('DROP INDEX %s' % index)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   260
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   261
    def create_indexes(self, etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
        """Recreate indexes for a given entity type"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
        for index, attr in self.indexes_etypes.get(etype, []):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
            sql = 'CREATE INDEX %s ON cw_%s(%s)' % (index, etype, attr)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
            self._cnx.system_sql(sql)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   266
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   267
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   268
###########################################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   269
## SQL Source #############################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   270
###########################################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   271
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   272
class SQLGenSourceWrapper(object):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   273
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   274
    def __init__(self, system_source, schema,
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   275
                 dump_output_dir=None):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   276
        self.system_source = system_source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   277
        # Explicitely backport attributes from system source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   278
        self._storage_handler = self.system_source._storage_handler
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   279
        self.preprocess_entity = self.system_source.preprocess_entity
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   280
        self.sqlgen = self.system_source.sqlgen
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   281
        self.uri = self.system_source.uri
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   282
        self.eid = self.system_source.eid
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   283
        # Directory to write temporary files
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   284
        self.dump_output_dir = dump_output_dir
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   285
        # Allow to execute code with SQLite backend that does
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   286
        # not support (yet...) copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   287
        # XXX Should be dealt with in logilab.database
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   288
        spcfrom = system_source.dbhelper.dbapi_module.support_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   289
        self.support_copy_from = spcfrom
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   290
        self.dbencoding = system_source.dbhelper.dbencoding
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   291
        self.init_statement_lists()
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   292
        self._inlined_rtypes_cache = {}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   293
        self._fill_inlined_rtypes_cache(schema)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   294
        self.schema = schema
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   295
        self.do_fti = False
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   296
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   297
    def _fill_inlined_rtypes_cache(self, schema):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   298
        cache = self._inlined_rtypes_cache
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   299
        for eschema in schema.entities():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   300
            for rschema in eschema.ordered_relations():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   301
                if rschema.inlined:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   302
                    cache[eschema.type] = SQL_PREFIX + rschema.type
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   303
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   304
    def init_statement_lists(self):
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   305
        self._sql_entities = defaultdict(list)
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   306
        self._sql_relations = {}
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   307
        self._sql_inlined_relations = {}
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   308
        self._sql_eids = defaultdict(list)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   309
        # keep track, for each eid of the corresponding data dict
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   310
        self._sql_eid_insertdicts = {}
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   311
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   312
    def flush(self):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   313
        print 'starting flush'
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   314
        _entities_sql = self._sql_entities
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   315
        _relations_sql = self._sql_relations
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   316
        _inlined_relations_sql = self._sql_inlined_relations
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   317
        _insertdicts = self._sql_eid_insertdicts
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   318
        try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   319
            # try, for each inlined_relation, to find if we're also creating
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   320
            # the host entity (i.e. the subject of the relation).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   321
            # In that case, simply update the insert dict and remove
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   322
            # the need to make the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   323
            # UPDATE statement
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   324
            for statement, datalist in _inlined_relations_sql.iteritems():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
                new_datalist = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
                # for a given inlined relation,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   327
                # browse each couple to be inserted
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   328
                for data in datalist:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   329
                    keys = list(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   330
                    # For inlined relations, it exists only two case:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   331
                    # (rtype, cw_eid) or (cw_eid, rtype)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   332
                    if keys[0] == 'cw_eid':
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   333
                        rtype = keys[1]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   334
                    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   335
                        rtype = keys[0]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
                    updated_eid = data['cw_eid']
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   337
                    if updated_eid in _insertdicts:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   338
                        _insertdicts[updated_eid][rtype] = data[rtype]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   339
                    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   340
                        # could not find corresponding insert dict, keep the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   341
                        # UPDATE query
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   342
                        new_datalist.append(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   343
                _inlined_relations_sql[statement] = new_datalist
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   344
            _execmany_thread(self.system_source.get_connection,
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   345
                             self._sql_eids.items()
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   346
                             + _entities_sql.items()
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   347
                             + _relations_sql.items()
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   348
                             + _inlined_relations_sql.items(),
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   349
                             dump_output_dir=self.dump_output_dir,
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   350
                             support_copy_from=self.support_copy_from,
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   351
                             encoding=self.dbencoding)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   352
        finally:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   353
            _entities_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   354
            _relations_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   355
            _insertdicts.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   356
            _inlined_relations_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   357
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   358
    def add_relation(self, cnx, subject, rtype, object,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   359
                     inlined=False, **kwargs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   360
        if inlined:
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   361
            _sql = self._sql_inlined_relations
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   362
            data = {'cw_eid': subject, SQL_PREFIX + rtype: object}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   363
            subjtype = kwargs.get('subjtype')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   364
            if subjtype is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   365
                # Try to infer it
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   366
                targets = [t.type for t in
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   367
                           self.schema.rschema(rtype).subjects()]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   368
                if len(targets) == 1:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   369
                    subjtype = targets[0]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   370
                else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   371
                    raise ValueError('You should give the subject etype for '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   372
                                     'inlined relation %s'
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   373
                                     ', as it cannot be inferred: '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   374
                                     'this type is given as keyword argument '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   375
                                     '``subjtype``'% rtype)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   376
            statement = self.sqlgen.update(SQL_PREFIX + subjtype,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   377
                                           data, ['cw_eid'])
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   378
        else:
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   379
            _sql = self._sql_relations
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   380
            data = {'eid_from': subject, 'eid_to': object}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   381
            statement = self.sqlgen.insert('%s_relation' % rtype, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   382
        if statement in _sql:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   383
            _sql[statement].append(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   384
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   385
            _sql[statement] = [data]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   386
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   387
    def add_entity(self, cnx, entity):
10983
da8168612e61 [dataimport] Fix method signature
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10982
diff changeset
   388
        with self._storage_handler(cnx, entity, 'added'):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   389
            attrs = self.preprocess_entity(entity)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   390
            rtypes = self._inlined_rtypes_cache.get(entity.cw_etype, ())
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   391
            if isinstance(rtypes, str):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   392
                rtypes = (rtypes,)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   393
            for rtype in rtypes:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   394
                if rtype not in attrs:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   395
                    attrs[rtype] = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   396
            sql = self.sqlgen.insert(SQL_PREFIX + entity.cw_etype, attrs)
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   397
            self._sql_eid_insertdicts[entity.eid] = attrs
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   398
            self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   399
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   400
    def _append_to_entities(self, sql, attrs):
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   401
        self._sql_entities[sql].append(attrs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   402
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   403
    def _handle_insert_entity_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   404
        # We have to overwrite the source given in parameters
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   405
        # as here, we directly use the system source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   406
        attrs['asource'] = self.system_source.uri
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   407
        self._sql_eids[sql].append(attrs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   408
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   409
    def _handle_is_relation_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   410
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   411
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   412
    def _handle_is_instance_of_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   413
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   414
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   415
    def _handle_source_relation_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   416
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   417
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   418
    # add_info is _copypasted_ from the one in NativeSQLSource. We want it
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   419
    # there because it will use the _handlers of the SQLGenSourceWrapper, which
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   420
    # are not like the ones in the native source.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   421
    def add_info(self, cnx, entity, source, extid):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   422
        """add type and source info for an eid into the system table"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   423
        # begin by inserting eid/type/source/extid into the entities table
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   424
        if extid is not None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   425
            assert isinstance(extid, str)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   426
            extid = b64encode(extid)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   427
        attrs = {'type': entity.cw_etype, 'eid': entity.eid, 'extid': extid,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   428
                 'asource': source.uri}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   429
        self._handle_insert_entity_sql(cnx, self.sqlgen.insert('entities', attrs), attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   430
        # insert core relations: is, is_instance_of and cw_source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   431
        try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   432
            self._handle_is_relation_sql(cnx, 'INSERT INTO is_relation(eid_from,eid_to) VALUES (%s,%s)',
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   433
                                         (entity.eid, eschema_eid(cnx, entity.e_schema)))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   434
        except IndexError:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   435
            # during schema serialization, skip
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   436
            pass
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   437
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   438
            for eschema in entity.e_schema.ancestors() + [entity.e_schema]:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   439
                self._handle_is_relation_sql(cnx,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   440
                                             'INSERT INTO is_instance_of_relation(eid_from,eid_to) VALUES (%s,%s)',
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   441
                                             (entity.eid, eschema_eid(cnx, eschema)))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   442
        if 'CWSource' in self.schema and source.eid is not None: # else, cw < 3.10
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   443
            self._handle_is_relation_sql(cnx, 'INSERT INTO cw_source_relation(eid_from,eid_to) VALUES (%s,%s)',
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   444
                                         (entity.eid, source.eid))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   445
        # now we can update the full text index
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   446
        if self.do_fti and self.need_fti_indexation(entity.cw_etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   447
            self.index_entity(cnx, entity=entity)