cubicweb/dataimport/pgstore.py
author Philippe Pepiot <philippe.pepiot@logilab.fr>
Tue, 17 Mar 2020 13:34:54 +0100
changeset 12917 db0f56b19583
parent 12567 26744ad37953
permissions -rw-r--r--
[pkg] merge 3.27 Require python >= 3.6 since recent typing notations require >= 3.6
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
11767
432f87a63057 flake8 and all
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11346
diff changeset
     1
# copyright 2003-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
# This file is part of CubicWeb.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
# CubicWeb is free software: you can redistribute it and/or modify it under the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# terms of the GNU Lesser General Public License as published by the Free
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# Software Foundation, either version 2.1 of the License, or (at your option)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# any later version.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# details.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
#
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
# You should have received a copy of the GNU Lesser General Public License along
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
"""Postgres specific store"""
11767
432f87a63057 flake8 and all
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11346
diff changeset
    19
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
import warnings
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    21
import os.path as osp
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
    22
from io import StringIO
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    23
from time import asctime
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
from datetime import date, datetime, time
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    25
from collections import defaultdict
12567
26744ad37953 Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 12504
diff changeset
    26
import pickle
10602
4845012cfc8e [py3k] import 'pickle' using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10598
diff changeset
    27
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
from cubicweb.utils import make_uid
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
from cubicweb.server.sqlutils import SQL_PREFIX
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    30
from cubicweb.dataimport.stores import NoHookRQLObjectStore
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    31
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
def _execmany_thread_not_copy_from(cu, statement, data, table=None,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
                                   columns=None, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
    """ Execute thread without copy from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
    cu.executemany(statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
    39
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
def _execmany_thread_copy_from(cu, statement, data, table,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
                               columns, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
    """ Execute thread with copy from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
    """
10978
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    44
    try:
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    45
        buf = _create_copyfrom_buffer(data, columns, encoding=encoding)
6f88cb7b7a84 merge with 3.20.12
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10513
diff changeset
    46
    except ValueError:
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    47
        _execmany_thread_not_copy_from(cu, statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    48
    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    49
        if columns is None:
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
    50
            cu.copy_from(buf, table, null=u'NULL')
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
        else:
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
    52
            cu.copy_from(buf, table, null=u'NULL', columns=columns)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
    54
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    55
def _execmany_thread(sql_connect, statements, dump_output_dir=None,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    56
                     support_copy_from=True, encoding='utf-8'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
    Execute sql statement. If 'INSERT INTO', try to use 'COPY FROM' command,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
    or fallback to execute_many.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
    if support_copy_from:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
        execmany_func = _execmany_thread_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
        execmany_func = _execmany_thread_not_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
    cnx = sql_connect()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
    cu = cnx.cursor()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
    try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
        for statement, data in statements:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
            table = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
            columns = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
            try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
                if not statement.startswith('INSERT INTO'):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
                    cu.executemany(statement, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
                    continue
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
                table = statement.split()[2]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
                if isinstance(data[0], (tuple, list)):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
                    columns = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
                else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
                    columns = list(data[0])
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
                execmany_func(cu, statement, data, table, columns, encoding)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
            except Exception:
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
    82
                print('unable to copy data into table %s' % table)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    83
                # Error in import statement, save data in dump_output_dir
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    84
                if dump_output_dir is not None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    85
                    pdata = {'data': data, 'statement': statement,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    86
                             'time': asctime(), 'columns': columns}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    87
                    filename = make_uid()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    88
                    try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    89
                        with open(osp.join(dump_output_dir,
10598
b3f9f929385f [dataimport] Use pickle.dump instead of dumps
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10592
diff changeset
    90
                                           '%s.pickle' % filename), 'wb') as fobj:
10602
4845012cfc8e [py3k] import 'pickle' using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10598
diff changeset
    91
                            pickle.dump(pdata, fobj)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
                    except IOError:
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
    93
                        print('ERROR while pickling in', dump_output_dir, filename+'.pickle')
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    94
                cnx.rollback()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    95
                raise
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
    finally:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
        cnx.commit()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
        cu.close()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
def _copyfrom_buffer_convert_None(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
    '''Convert None value to "NULL"'''
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   103
    return u'NULL'
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   104
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   105
def _copyfrom_buffer_convert_number(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   106
    '''Convert a number into its string representation'''
12567
26744ad37953 Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 12504
diff changeset
   107
    return str(value)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   108
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   109
def _copyfrom_buffer_convert_string(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   110
    '''Convert string value.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   111
    '''
10592
dfa1dcf4d7f1 [py3k] ur'' is gone
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10589
diff changeset
   112
    escape_chars = ((u'\\', u'\\\\'), (u'\t', u'\\t'), (u'\r', u'\\r'),
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
                    (u'\n', u'\\n'))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   114
    for char, replace in escape_chars:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   115
        value = value.replace(char, replace)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   116
    return value
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
def _copyfrom_buffer_convert_date(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   119
    '''Convert date into "YYYY-MM-DD"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   120
    # Do not use strftime, as it yields issue with date < 1900
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   121
    # (http://bugs.python.org/issue1777412)
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   122
    return u'%04d-%02d-%02d' % (value.year, value.month, value.day)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   123
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   124
def _copyfrom_buffer_convert_datetime(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   125
    '''Convert date into "YYYY-MM-DD HH:MM:SS.UUUUUU"'''
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   126
    # Do not use strftime, as it yields issue with date < 1900
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   127
    # (http://bugs.python.org/issue1777412)
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   128
    return u'%s %s' % (_copyfrom_buffer_convert_date(value, **opts),
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   129
                       _copyfrom_buffer_convert_time(value, **opts))
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   130
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   131
def _copyfrom_buffer_convert_time(value, **opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   132
    '''Convert time into "HH:MM:SS.UUUUUU"'''
10810
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   133
    return u'%02d:%02d:%02d.%06d' % (value.hour, value.minute,
0768bf2333a7 [dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents: 10662
diff changeset
   134
                                     value.second, value.microsecond)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   135
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   136
# (types, converter) list.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   137
_COPYFROM_BUFFER_CONVERTERS = [
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   138
    (type(None), _copyfrom_buffer_convert_None),
12567
26744ad37953 Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 12504
diff changeset
   139
    ((int, float), _copyfrom_buffer_convert_number),
26744ad37953 Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 12504
diff changeset
   140
    (str, _copyfrom_buffer_convert_string),
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   141
    (datetime, _copyfrom_buffer_convert_datetime),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   142
    (date, _copyfrom_buffer_convert_date),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   143
    (time, _copyfrom_buffer_convert_time),
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   144
]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   145
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
def _create_copyfrom_buffer(data, columns=None, **convert_opts):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   147
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   148
    Create a StringIO buffer for 'COPY FROM' command.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   149
    Deals with Unicode, Int, Float, Date... (see ``converters``)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   150
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   151
    :data: a sequence/dict of tuples
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   152
    :columns: list of columns to consider (default to all columns)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
    :converter_opts: keyword arguements given to converters
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   154
    """
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   155
    # Create a list rather than directly create a StringIO
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   156
    # to correctly write lines separated by '\n' in a single step
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   157
    rows = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   158
    if columns is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   159
        if isinstance(data[0], (tuple, list)):
10609
e2d8e81bfe68 [py3k] import range using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10602
diff changeset
   160
            columns = list(range(len(data[0])))
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   161
        elif isinstance(data[0], dict):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   162
            columns = data[0].keys()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   163
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   164
            raise ValueError('Could not get columns: you must provide columns.')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   165
    for row in data:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
        # Iterate over the different columns and the different values
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
        # and try to convert them to a correct datatype.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
        # If an error is raised, do not continue.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   169
        formatted_row = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   170
        for col in columns:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
            try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
                value = row[col]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
            except KeyError:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
                warnings.warn(u"Column %s is not accessible in row %s"
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
                              % (col, row), RuntimeWarning)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
                # XXX 'value' set to None so that the import does not end in
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
                # error.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
                # Instead, the extra keys are set to NULL from the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
                # database point of view.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
                value = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
            for types, converter in _COPYFROM_BUFFER_CONVERTERS:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   182
                if isinstance(value, types):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
                    value = converter(value, **convert_opts)
12567
26744ad37953 Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 12504
diff changeset
   184
                    assert isinstance(value, str)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   185
                    break
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   186
            else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   187
                raise ValueError("Unsupported value type %s" % type(value))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   188
            # We push the value to the new formatted row
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
            # if the value is not None and could be converted to a string.
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   190
            formatted_row.append(value)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
        rows.append('\t'.join(formatted_row))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
    return StringIO('\n'.join(rows))
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   193
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   194
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
###########################################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
## SQL Source #############################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   197
###########################################################################
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   199
class SQLGenSourceWrapper(object):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   200
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   201
    def __init__(self, system_source, schema,
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   202
                 dump_output_dir=None):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   203
        self.system_source = system_source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   204
        # Explicitely backport attributes from system source
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   205
        self._storage_handler = self.system_source._storage_handler
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   206
        self.preprocess_entity = self.system_source.preprocess_entity
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   207
        self.sqlgen = self.system_source.sqlgen
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   208
        self.uri = self.system_source.uri
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
        self.eid = self.system_source.eid
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   210
        # Directory to write temporary files
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
        self.dump_output_dir = dump_output_dir
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   212
        # Allow to execute code with SQLite backend that does
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
        # not support (yet...) copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   214
        # XXX Should be dealt with in logilab.database
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
        spcfrom = system_source.dbhelper.dbapi_module.support_copy_from
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
        self.support_copy_from = spcfrom
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   217
        self.dbencoding = system_source.dbhelper.dbencoding
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   218
        self.init_statement_lists()
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   219
        self._inlined_rtypes_cache = {}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
        self._fill_inlined_rtypes_cache(schema)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   221
        self.schema = schema
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   222
        self.do_fti = False
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   224
    def _fill_inlined_rtypes_cache(self, schema):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   225
        cache = self._inlined_rtypes_cache
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
        for eschema in schema.entities():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   227
            for rschema in eschema.ordered_relations():
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   228
                if rschema.inlined:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   229
                    cache[eschema.type] = SQL_PREFIX + rschema.type
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   230
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   231
    def init_statement_lists(self):
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   232
        self._sql_entities = defaultdict(list)
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   233
        self._sql_relations = {}
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   234
        self._sql_inlined_relations = {}
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   235
        self._sql_eids = defaultdict(list)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
        # keep track, for each eid of the corresponding data dict
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   237
        self._sql_eid_insertdicts = {}
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   238
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   239
    def flush(self):
10589
7c23b7de2b8d [py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10513
diff changeset
   240
        print('starting flush')
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   241
        _entities_sql = self._sql_entities
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   242
        _relations_sql = self._sql_relations
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   243
        _inlined_relations_sql = self._sql_inlined_relations
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   244
        _insertdicts = self._sql_eid_insertdicts
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   245
        try:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
            # try, for each inlined_relation, to find if we're also creating
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
            # the host entity (i.e. the subject of the relation).
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   248
            # In that case, simply update the insert dict and remove
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
            # the need to make the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
            # UPDATE statement
10662
10942ed172de [py3k] dict.iteritems → dict.items
Rémi Cardona <remi.cardona@logilab.fr>
parents: 10613
diff changeset
   251
            for statement, datalist in _inlined_relations_sql.items():
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
                new_datalist = []
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   253
                # for a given inlined relation,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
                # browse each couple to be inserted
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   255
                for data in datalist:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   256
                    keys = list(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   257
                    # For inlined relations, it exists only two case:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   258
                    # (rtype, cw_eid) or (cw_eid, rtype)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   259
                    if keys[0] == 'cw_eid':
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   260
                        rtype = keys[1]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   261
                    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
                        rtype = keys[0]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
                    updated_eid = data['cw_eid']
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
                    if updated_eid in _insertdicts:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
                        _insertdicts[updated_eid][rtype] = data[rtype]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   266
                    else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   267
                        # could not find corresponding insert dict, keep the
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   268
                        # UPDATE query
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   269
                        new_datalist.append(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   270
                _inlined_relations_sql[statement] = new_datalist
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   271
            _execmany_thread(self.system_source.get_connection,
11009
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   272
                             list(self._sql_eids.items())
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   273
                             + list(_entities_sql.items())
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   274
                             + list(_relations_sql.items())
d5962fb5bb8e [dataimport] py3k doesn't like + on dict_items objects
Julien Cristau <julien.cristau@logilab.fr>
parents: 10991
diff changeset
   275
                             + list(_inlined_relations_sql.items()),
10985
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   276
                             dump_output_dir=self.dump_output_dir,
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   277
                             support_copy_from=self.support_copy_from,
50ed87bc4cc6 [dataimport] remove threading support from SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10983
diff changeset
   278
                             encoding=self.dbencoding)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   279
        finally:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   280
            _entities_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   281
            _relations_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   282
            _insertdicts.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   283
            _inlined_relations_sql.clear()
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   284
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   285
    def add_relation(self, cnx, subject, rtype, object,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   286
                     inlined=False, **kwargs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   287
        if inlined:
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   288
            _sql = self._sql_inlined_relations
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   289
            data = {'cw_eid': subject, SQL_PREFIX + rtype: object}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   290
            subjtype = kwargs.get('subjtype')
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   291
            if subjtype is None:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   292
                # Try to infer it
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   293
                targets = [t.type for t in
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   294
                           self.schema.rschema(rtype).subjects()]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   295
                if len(targets) == 1:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   296
                    subjtype = targets[0]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   297
                else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   298
                    raise ValueError('You should give the subject etype for '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   299
                                     'inlined relation %s'
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   300
                                     ', as it cannot be inferred: '
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   301
                                     'this type is given as keyword argument '
11140
fabcd1c6dcd1 [dataimport] cleanup a bit nohook store, mostly protecting some attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11129
diff changeset
   302
                                     '``subjtype``' % rtype)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   303
            statement = self.sqlgen.update(SQL_PREFIX + subjtype,
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   304
                                           data, ['cw_eid'])
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   305
        else:
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   306
            _sql = self._sql_relations
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   307
            data = {'eid_from': subject, 'eid_to': object}
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   308
            statement = self.sqlgen.insert('%s_relation' % rtype, data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   309
        if statement in _sql:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   310
            _sql[statement].append(data)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   311
        else:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   312
            _sql[statement] = [data]
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   313
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   314
    def add_entity(self, cnx, entity):
10983
da8168612e61 [dataimport] Fix method signature
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 10982
diff changeset
   315
        with self._storage_handler(cnx, entity, 'added'):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   316
            attrs = self.preprocess_entity(entity)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   317
            rtypes = self._inlined_rtypes_cache.get(entity.cw_etype, ())
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   318
            if isinstance(rtypes, str):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   319
                rtypes = (rtypes,)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   320
            for rtype in rtypes:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   321
                if rtype not in attrs:
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   322
                    attrs[rtype] = None
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   323
            sql = self.sqlgen.insert(SQL_PREFIX + entity.cw_etype, attrs)
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   324
            self._sql_eid_insertdicts[entity.eid] = attrs
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
            self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   327
    def _append_to_entities(self, sql, attrs):
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   328
        self._sql_entities[sql].append(attrs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   329
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   330
    def _handle_insert_entity_sql(self, cnx, sql, attrs):
10986
ca8321b32392 [dataimport] separate entities table from other metadata in SQLGenObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10985
diff changeset
   331
        self._sql_eids[sql].append(attrs)
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   332
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   333
    def _handle_is_relation_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   334
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   335
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
    def _handle_is_instance_of_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   337
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   338
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   339
    def _handle_source_relation_sql(self, cnx, sql, attrs):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   340
        self._append_to_entities(sql, attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   341
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   342
    # add_info is _copypasted_ from the one in NativeSQLSource. We want it
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   343
    # there because it will use the _handlers of the SQLGenSourceWrapper, which
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   344
    # are not like the ones in the native source.
11774
51c160677afe [repository] Drop the entities.extid column and associated cache
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11773
diff changeset
   345
    def add_info(self, cnx, entity, source):
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   346
        """add type and source info for an eid into the system table"""
11774
51c160677afe [repository] Drop the entities.extid column and associated cache
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11773
diff changeset
   347
        # begin by inserting eid/type/source into the entities table
51c160677afe [repository] Drop the entities.extid column and associated cache
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11773
diff changeset
   348
        attrs = {'type': entity.cw_etype, 'eid': entity.eid}
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   349
        self._handle_insert_entity_sql(cnx, self.sqlgen.insert('entities', attrs), attrs)
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   350
        # insert core relations: is, is_instance_of and cw_source
11091
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   351
        self._handle_is_relation_sql(cnx, 'INSERT INTO is_relation(eid_from,eid_to) VALUES (%s,%s)',
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   352
                                     (entity.eid, entity.e_schema.eid))
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   353
        for eschema in entity.e_schema.ancestors() + [entity.e_schema]:
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   354
            self._handle_is_relation_sql(cnx,
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   355
                                         'INSERT INTO is_instance_of_relation(eid_from,eid_to) VALUES (%s,%s)',
29aebc1edd29 [repository] drop usage of no more necessary eschema_eid function
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11089
diff changeset
   356
                                         (entity.eid, eschema.eid))
11089
731d217e4a31 [dataimport] simplify SQLGenSourceWrapper.add_info
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11010
diff changeset
   357
        self._handle_is_relation_sql(cnx, 'INSERT INTO cw_source_relation(eid_from,eid_to) VALUES (%s,%s)',
731d217e4a31 [dataimport] simplify SQLGenSourceWrapper.add_info
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11010
diff changeset
   358
                                     (entity.eid, source.eid))
10513
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   359
        # now we can update the full text index
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   360
        if self.do_fti and self.need_fti_indexation(entity.cw_etype):
7bec01a59f92 [dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   361
            self.index_entity(cnx, entity=entity)