cubicweb/dataimport/pgstore.py
author Julien Cristau <julien.cristau@logilab.fr>
Mon, 08 Feb 2016 15:37:52 +0100
changeset 11157 42fa15632493
parent 11140 fabcd1c6dcd1
child 11307 74f5814ecdf0
permissions -rw-r--r--
[web/test] return the hash of uploaded files in FileUploadTC instead of their contents json must be unicode, which doesn't go well with arbitrary file contents. Compute the file hash instead, as we know the hexdigest is (ascii) text.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
# This file is part of CubicWeb.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
# CubicWeb is free software: you can redistribute it and/or modify it under the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# terms of the GNU Lesser General Public License as published by the Free
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# Software Foundation, either version 2.1 of the License, or (at your option)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# any later version.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# details.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
# You should have received a copy of the GNU Lesser General Public License along
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
"""Postgres specific store"""
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
    19
from __future__ import print_function
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    21
import warnings
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
import os.path as osp
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
    23
from io import StringIO
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
from time import asctime
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    25
from datetime import date, datetime, time
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    26
from collections import defaultdict
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
from base64 import b64encode
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
11010
09be4709c8c0 [dataimport] fix SQLGenObjectStore's add_info for python3
Julien Cristau <julien.cristau@logilab.fr>
parents: 11009
diff changeset
    29
from six import string_types, integer_types, text_type, binary_type
10609
e2d8e81bfe68 [py3k] import range using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10602
diff changeset
    30
from six.moves import cPickle as pickle, range
10602
4845012cfc8e [py3k] import 'pickle' using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10598
diff changeset
    31
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
from cubicweb.utils import make_uid
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
from cubicweb.server.sqlutils import SQL_PREFIX
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
from cubicweb.dataimport.stores import NoHookRQLObjectStore
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
def _execmany_thread_not_copy_from(cu, statement, data, table=None,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
                                   columns=None, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
    """ Execute thread without copy from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
    cu.executemany(statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
    43
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
def _execmany_thread_copy_from(cu, statement, data, table,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    45
                               columns, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    46
    """ Execute thread with copy from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    47
    """
10978
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    48
    try:
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    49
        buf = _create_copyfrom_buffer(data, columns, encoding=encoding)
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    50
    except ValueError:
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
        _execmany_thread_not_copy_from(cu, statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
        if columns is None:
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
    54
            cu.copy_from(buf, table, null=u'NULL')
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    55
        else:
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
    56
            cu.copy_from(buf, table, null=u'NULL', columns=columns)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
    58
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
def _execmany_thread(sql_connect, statements, dump_output_dir=None,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
                     support_copy_from=True, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
    Execute sql statement. If 'INSERT INTO', try to use 'COPY FROM' command,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
    or fallback to execute_many.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
    if support_copy_from:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
        execmany_func = _execmany_thread_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
        execmany_func = _execmany_thread_not_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
    cnx = sql_connect()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
    cu = cnx.cursor()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
    try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
        for statement, data in statements:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
            table = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
            columns = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
            try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
                if not statement.startswith('INSERT INTO'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
                    cu.executemany(statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
                    continue
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
                table = statement.split()[2]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
                if isinstance(data[0], (tuple, list)):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
                    columns = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    82
                else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    83
                    columns = list(data[0])
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    84
                execmany_func(cu, statement, data, table, columns, encoding)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    85
            except Exception:
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
    86
                print('unable to copy data into table %s' % table)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    87
                # Error in import statement, save data in dump_output_dir
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    88
                if dump_output_dir is not None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    89
                    pdata = {'data': data, 'statement': statement,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    90
                             'time': asctime(), 'columns': columns}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    91
                    filename = make_uid()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
                    try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    93
                        with open(osp.join(dump_output_dir,
10598
b3f9f929385f [dataimport] Use pickle.dump instead of dumps
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10592
diff changeset
    94
                                           '%s.pickle' % filename), 'wb') as fobj:
10602
4845012cfc8e [py3k] import 'pickle' using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10598
diff changeset
    95
                            pickle.dump(pdata, fobj)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
                    except IOError:
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
    97
                        print('ERROR while pickling in', dump_output_dir, filename+'.pickle')
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
                cnx.rollback()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
                raise
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
    finally:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
        cnx.commit()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
        cu.close()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   103
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   104
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   105
def _copyfrom_buffer_convert_None(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   106
    '''Convert None value to "NULL"'''
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   107
    return u'NULL'
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   108
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   109
def _copyfrom_buffer_convert_number(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   110
    '''Convert a number into its string representation'''
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   111
    return text_type(value)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   112
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
def _copyfrom_buffer_convert_string(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   114
    '''Convert string value.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   115
    '''
10592
dfa1dcf4d7f1 [py3k] ur'' is gone
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10589
diff changeset
   116
    escape_chars = ((u'\\', u'\\\\'), (u'\t', u'\\t'), (u'\r', u'\\r'),
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
                    (u'\n', u'\\n'))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
    for char, replace in escape_chars:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   119
        value = value.replace(char, replace)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   120
    return value
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   121
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   122
def _copyfrom_buffer_convert_date(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   123
    '''Convert date into "YYYY-MM-DD"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   124
    # Do not use strftime, as it yields issue with date < 1900
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   125
    # (http://bugs.python.org/issue1777412)
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   126
    return u'%04d-%02d-%02d' % (value.year, value.month, value.day)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   127
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   128
def _copyfrom_buffer_convert_datetime(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   129
    '''Convert date into "YYYY-MM-DD HH:MM:SS.UUUUUU"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   130
    # Do not use strftime, as it yields issue with date < 1900
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   131
    # (http://bugs.python.org/issue1777412)
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   132
    return u'%s %s' % (_copyfrom_buffer_convert_date(value, **opts),
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   133
                       _copyfrom_buffer_convert_time(value, **opts))
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   134
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   135
def _copyfrom_buffer_convert_time(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   136
    '''Convert time into "HH:MM:SS.UUUUUU"'''
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   137
    return u'%02d:%02d:%02d.%06d' % (value.hour, value.minute,
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   138
                                     value.second, value.microsecond)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   139
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   140
# (types, converter) list.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   141
_COPYFROM_BUFFER_CONVERTERS = [
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   142
    (type(None), _copyfrom_buffer_convert_None),
10613
8d9fe02387e3 [py3k] six.integer_types
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10612
diff changeset
   143
    (integer_types + (float,), _copyfrom_buffer_convert_number),
10612
84468b90e9c1 [py3k] basestring → six.string_types
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10609
diff changeset
   144
    (string_types, _copyfrom_buffer_convert_string),
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   145
    (datetime, _copyfrom_buffer_convert_datetime),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
    (date, _copyfrom_buffer_convert_date),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   147
    (time, _copyfrom_buffer_convert_time),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   148
]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   149
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   150
def _create_copyfrom_buffer(data, columns=None, **convert_opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   151
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   152
    Create a StringIO buffer for 'COPY FROM' command.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
    Deals with Unicode, Int, Float, Date... (see ``converters``)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   154
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   155
    :data: a sequence/dict of tuples
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   156
    :columns: list of columns to consider (default to all columns)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   157
    :converter_opts: keyword arguements given to converters
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   158
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   159
    # Create a list rather than directly create a StringIO
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   160
    # to correctly write lines separated by '\n' in a single step
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   161
    rows = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   162
    if columns is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   163
        if isinstance(data[0], (tuple, list)):
10609
e2d8e81bfe68 [py3k] import range using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10602
diff changeset
   164
            columns = list(range(len(data[0])))
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   165
        elif isinstance(data[0], dict):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
            columns = data[0].keys()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
            raise ValueError('Could not get columns: you must provide columns.')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   169
    for row in data:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   170
        # Iterate over the different columns and the different values
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
        # and try to convert them to a correct datatype.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
        # If an error is raised, do not continue.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
        formatted_row = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
        for col in columns:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
            try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
                value = row[col]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
            except KeyError:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
                warnings.warn(u"Column %s is not accessible in row %s"
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
                              % (col, row), RuntimeWarning)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
                # XXX 'value' set to None so that the import does not end in
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
                # error.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   182
                # Instead, the extra keys are set to NULL from the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
                # database point of view.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   184
                value = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   185
            for types, converter in _COPYFROM_BUFFER_CONVERTERS:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   186
                if isinstance(value, types):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   187
                    value = converter(value, **convert_opts)
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   188
                    assert isinstance(value, text_type)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
                    break
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   190
            else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
                raise ValueError("Unsupported value type %s" % type(value))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
            # We push the value to the new formatted row
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   193
            # if the value is not None and could be converted to a string.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   194
            formatted_row.append(value)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
        rows.append('\t'.join(formatted_row))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
    return StringIO('\n'.join(rows))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   197
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   199
class SQLGenObjectStore(NoHookRQLObjectStore):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   200
    """Controller of the data import process. This version is based
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   201
    on direct insertions throught SQL command (COPY FROM or execute many).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   202
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   203
    >>> store = SQLGenObjectStore(cnx)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   204
    >>> store.create_entity('Person', ...)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   205
    >>> store.flush()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   206
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   207
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   208
    def __init__(self, cnx, dump_output_dir=None, nb_threads_statement=1):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
        """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   210
        Initialize a SQLGenObjectStore.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   212
        Parameters:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   214
          - cnx: connection on the cubicweb instance
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
          - dump_output_dir: a directory to dump failed statements
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
            for easier recovery. Default is None (no dump).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   217
        """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   218
        super(SQLGenObjectStore, self).__init__(cnx)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   219
        ### hijack default source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
        self.source = SQLGenSourceWrapper(
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   221
            self.source, cnx.vreg.schema,
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   222
            dump_output_dir=dump_output_dir)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
        ### XXX This is done in super().__init__(), but should be
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   224
        ### redone here to link to the correct source
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   225
        self._add_relation = self.source.add_relation
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
        self.indexes_etypes = {}
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   227
        if nb_threads_statement != 1:
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   228
            warn('[3.21] SQLGenObjectStore is no longer threaded', DeprecationWarning)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   229
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   230
    def flush(self):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   231
        """Flush data to the database"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   232
        self.source.flush()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   233
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   234
    def relate(self, subj_eid, rtype, obj_eid, **kwargs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   235
        if subj_eid is None or obj_eid is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
            return
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   237
        # XXX Could subjtype be inferred ?
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   238
        self._add_relation(self._cnx, subj_eid, rtype, obj_eid,
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   239
                           self.rschema(rtype).inlined, **kwargs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   240
        if self.rschema(rtype).symmetric:
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   241
            self._add_relation(self._cnx, obj_eid, rtype, subj_eid,
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   242
                               self.rschema(rtype).inlined, **kwargs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   243
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   244
    def drop_indexes(self, etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   245
        """Drop indexes for a given entity type"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
        if etype not in self.indexes_etypes:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
            cu = self._cnx.cnxset.cu
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   248
            def index_to_attr(index):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
                """turn an index name to (database) attribute name"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
                return index.replace(etype.lower(), '').replace('idx', '').strip('_')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   251
            indices = [(index, index_to_attr(index))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
                       for index in self.source.dbhelper.list_indices(cu, etype)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   253
                       # Do not consider 'cw_etype_pkey' index
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
                       if not index.endswith('key')]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   255
            self.indexes_etypes[etype] = indices
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   256
        for index, attr in self.indexes_etypes[etype]:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   257
            self._cnx.system_sql('DROP INDEX %s' % index)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   258
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   259
    def create_indexes(self, etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   260
        """Recreate indexes for a given entity type"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   261
        for index, attr in self.indexes_etypes.get(etype, []):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
            sql = 'CREATE INDEX %s ON cw_%s(%s)' % (index, etype, attr)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
            self._cnx.system_sql(sql)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   266
###########################################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   267
## SQL Source #############################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   268
###########################################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   269
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   270
class SQLGenSourceWrapper(object):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   271
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   272
    def __init__(self, system_source, schema,
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   273
                 dump_output_dir=None):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   274
        self.system_source = system_source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   275
        # Explicitely backport attributes from system source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   276
        self._storage_handler = self.system_source._storage_handler
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   277
        self.preprocess_entity = self.system_source.preprocess_entity
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   278
        self.sqlgen = self.system_source.sqlgen
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   279
        self.uri = self.system_source.uri
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   280
        self.eid = self.system_source.eid
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   281
        # Directory to write temporary files
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   282
        self.dump_output_dir = dump_output_dir
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   283
        # Allow to execute code with SQLite backend that does
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   284
        # not support (yet...) copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   285
        # XXX Should be dealt with in logilab.database
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   286
        spcfrom = system_source.dbhelper.dbapi_module.support_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   287
        self.support_copy_from = spcfrom
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   288
        self.dbencoding = system_source.dbhelper.dbencoding
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   289
        self.init_statement_lists()
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   290
        self._inlined_rtypes_cache = {}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   291
        self._fill_inlined_rtypes_cache(schema)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   292
        self.schema = schema
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   293
        self.do_fti = False
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   294
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   295
    def _fill_inlined_rtypes_cache(self, schema):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   296
        cache = self._inlined_rtypes_cache
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   297
        for eschema in schema.entities():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   298
            for rschema in eschema.ordered_relations():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   299
                if rschema.inlined:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   300
                    cache[eschema.type] = SQL_PREFIX + rschema.type
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   301
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   302
    def init_statement_lists(self):
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   303
        self._sql_entities = defaultdict(list)
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   304
        self._sql_relations = {}
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   305
        self._sql_inlined_relations = {}
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   306
        self._sql_eids = defaultdict(list)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   307
        # keep track, for each eid of the corresponding data dict
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   308
        self._sql_eid_insertdicts = {}
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   309
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   310
    def flush(self):
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
   311
        print('starting flush')
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   312
        _entities_sql = self._sql_entities
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   313
        _relations_sql = self._sql_relations
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   314
        _inlined_relations_sql = self._sql_inlined_relations
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   315
        _insertdicts = self._sql_eid_insertdicts
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   316
        try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   317
            # try, for each inlined_relation, to find if we're also creating
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   318
            # the host entity (i.e. the subject of the relation).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   319
            # In that case, simply update the insert dict and remove
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   320
            # the need to make the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   321
            # UPDATE statement
10662
10942ed172de [py3k] dict.iteritems → dict.items
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10613
diff changeset
   322
            for statement, datalist in _inlined_relations_sql.items():
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   323
                new_datalist = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   324
                # for a given inlined relation,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
                # browse each couple to be inserted
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
                for data in datalist:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   327
                    keys = list(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   328
                    # For inlined relations, it exists only two case:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   329
                    # (rtype, cw_eid) or (cw_eid, rtype)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   330
                    if keys[0] == 'cw_eid':
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   331
                        rtype = keys[1]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   332
                    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   333
                        rtype = keys[0]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   334
                    updated_eid = data['cw_eid']
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   335
                    if updated_eid in _insertdicts:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
                        _insertdicts[updated_eid][rtype] = data[rtype]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   337
                    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   338
                        # could not find corresponding insert dict, keep the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   339
                        # UPDATE query
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   340
                        new_datalist.append(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   341
                _inlined_relations_sql[statement] = new_datalist
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   342
            _execmany_thread(self.system_source.get_connection,
11009
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   343
                             list(self._sql_eids.items())
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   344
                             + list(_entities_sql.items())
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   345
                             + list(_relations_sql.items())
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   346
                             + list(_inlined_relations_sql.items()),
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   347
                             dump_output_dir=self.dump_output_dir,
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   348
                             support_copy_from=self.support_copy_from,
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   349
                             encoding=self.dbencoding)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   350
        finally:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   351
            _entities_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   352
            _relations_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   353
            _insertdicts.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   354
            _inlined_relations_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   355
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   356
    def add_relation(self, cnx, subject, rtype, object,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   357
                     inlined=False, **kwargs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   358
        if inlined:
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   359
            _sql = self._sql_inlined_relations
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   360
            data = {'cw_eid': subject, SQL_PREFIX + rtype: object}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   361
            subjtype = kwargs.get('subjtype')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   362
            if subjtype is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   363
                # Try to infer it
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   364
                targets = [t.type for t in
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   365
                           self.schema.rschema(rtype).subjects()]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   366
                if len(targets) == 1:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   367
                    subjtype = targets[0]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   368
                else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   369
                    raise ValueError('You should give the subject etype for '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   370
                                     'inlined relation %s'
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   371
                                     ', as it cannot be inferred: '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   372
                                     'this type is given as keyword argument '
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   373
                                     '``subjtype``' % rtype)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   374
            statement = self.sqlgen.update(SQL_PREFIX + subjtype,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   375
                                           data, ['cw_eid'])
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   376
        else:
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   377
            _sql = self._sql_relations
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   378
            data = {'eid_from': subject, 'eid_to': object}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   379
            statement = self.sqlgen.insert('%s_relation' % rtype, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   380
        if statement in _sql:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   381
            _sql[statement].append(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   382
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   383
            _sql[statement] = [data]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   384
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   385
    def add_entity(self, cnx, entity):
10983
da8168612e61 [dataimport] Fix method signature
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10982
diff changeset
   386
        with self._storage_handler(cnx, entity, 'added'):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   387
            attrs = self.preprocess_entity(entity)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   388
            rtypes = self._inlined_rtypes_cache.get(entity.cw_etype, ())
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   389
            if isinstance(rtypes, str):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   390
                rtypes = (rtypes,)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   391
            for rtype in rtypes:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   392
                if rtype not in attrs:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   393
                    attrs[rtype] = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   394
            sql = self.sqlgen.insert(SQL_PREFIX + entity.cw_etype, attrs)
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   395
            self._sql_eid_insertdicts[entity.eid] = attrs
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   396
            self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   397
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   398
    def _append_to_entities(self, sql, attrs):
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   399
        self._sql_entities[sql].append(attrs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   400
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   401
    def _handle_insert_entity_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   402
        # We have to overwrite the source given in parameters
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   403
        # as here, we directly use the system source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   404
        attrs['asource'] = self.system_source.uri
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   405
        self._sql_eids[sql].append(attrs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   406
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   407
    def _handle_is_relation_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   408
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   409
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   410
    def _handle_is_instance_of_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   411
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   412
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   413
    def _handle_source_relation_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   414
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   415
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   416
    # add_info is _copypasted_ from the one in NativeSQLSource. We want it
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   417
    # there because it will use the _handlers of the SQLGenSourceWrapper, which
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   418
    # are not like the ones in the native source.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   419
    def add_info(self, cnx, entity, source, extid):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   420
        """add type and source info for an eid into the system table"""
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   421
        # begin by inserting eid/type/source/extid into the entities table
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   422
        if extid is not None:
11010
09be4709c8c0 [dataimport] fix SQLGenObjectStore's add_info for python3
Julien Cristau <julien.cristau@logilab.fr>
parents: 11009
diff changeset
   423
            assert isinstance(extid, binary_type)
09be4709c8c0 [dataimport] fix SQLGenObjectStore's add_info for python3
Julien Cristau <julien.cristau@logilab.fr>
parents: 11009
diff changeset
   424
            extid = b64encode(extid).decode('ascii')
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   425
        attrs = {'type': entity.cw_etype, 'eid': entity.eid, 'extid': extid,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   426
                 'asource': source.uri}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   427
        self._handle_insert_entity_sql(cnx, self.sqlgen.insert('entities', attrs), attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   428
        # insert core relations: is, is_instance_of and cw_source
11091
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   429
        self._handle_is_relation_sql(cnx, 'INSERT INTO is_relation(eid_from,eid_to) VALUES (%s,%s)',
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   430
                                     (entity.eid, entity.e_schema.eid))
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   431
        for eschema in entity.e_schema.ancestors() + [entity.e_schema]:
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   432
            self._handle_is_relation_sql(cnx,
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   433
                                         'INSERT INTO is_instance_of_relation(eid_from,eid_to) VALUES (%s,%s)',
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   434
                                         (entity.eid, eschema.eid))
11089
731d217e4a31 [dataimport] simplify SQLGenSourceWrapper.add_info
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11010
diff changeset
   435
        self._handle_is_relation_sql(cnx, 'INSERT INTO cw_source_relation(eid_from,eid_to) VALUES (%s,%s)',
731d217e4a31 [dataimport] simplify SQLGenSourceWrapper.add_info
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11010
diff changeset
   436
                                     (entity.eid, source.eid))
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   437
        # now we can update the full text index
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   438
        if self.do_fti and self.need_fti_indexation(entity.cw_etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   439
            self.index_entity(cnx, entity=entity)