author | Julien Cristau <julien.cristau@logilab.fr> |
Mon, 09 Nov 2015 16:21:29 +0100 | |
changeset 10879 | 3193d9ede8dd |
parent 10810 | 0768bf2333a7 |
child 10941 | e3ec46b5c710 |
permissions | -rw-r--r-- |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# copyright 2003-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# This file is part of CubicWeb. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# any later version. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# details. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
"""Postgres specific store""" |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
19 |
from __future__ import print_function |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
import threading |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
import warnings |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
23 |
import os.path as osp |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
24 |
from io import StringIO |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
from time import asctime |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
26 |
from datetime import date, datetime, time |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
27 |
from collections import defaultdict |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
from base64 import b64encode |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
29 |
|
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
30 |
from six import string_types, integer_types, text_type |
10609
e2d8e81bfe68
[py3k] import range using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10602
diff
changeset
|
31 |
from six.moves import cPickle as pickle, range |
10602
4845012cfc8e
[py3k] import 'pickle' using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10598
diff
changeset
|
32 |
|
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
from cubicweb.utils import make_uid |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
from cubicweb.server.sqlutils import SQL_PREFIX |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
from cubicweb.dataimport.stores import NoHookRQLObjectStore |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
def _import_statements(sql_connect, statements, nb_threads=3, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
dump_output_dir=None, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
support_copy_from=True, encoding='utf-8'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
Import a bunch of sql statements, using different threads. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
chunksize = (len(statements) / nb_threads) + 1 |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
threads = [] |
10609
e2d8e81bfe68
[py3k] import range using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10602
diff
changeset
|
46 |
for i in range(nb_threads): |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
chunks = statements[i*chunksize:(i+1)*chunksize] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
thread = threading.Thread(target=_execmany_thread, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
args=(sql_connect, chunks, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
dump_output_dir, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
support_copy_from, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
encoding)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
thread.start() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
threads.append(thread) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
for t in threads: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
t.join() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
except Exception: |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
58 |
print('Error in import statements') |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
def _execmany_thread_not_copy_from(cu, statement, data, table=None, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
61 |
columns=None, encoding='utf-8'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
""" Execute thread without copy from |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
cu.executemany(statement, data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
65 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
def _execmany_thread_copy_from(cu, statement, data, table, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
67 |
columns, encoding='utf-8'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
68 |
""" Execute thread with copy from |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
69 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
70 |
buf = _create_copyfrom_buffer(data, columns, encoding=encoding) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
if buf is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
72 |
_execmany_thread_not_copy_from(cu, statement, data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
74 |
if columns is None: |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
75 |
cu.copy_from(buf, table, null=u'NULL') |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
else: |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
77 |
cu.copy_from(buf, table, null=u'NULL', columns=columns) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
78 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
79 |
def _execmany_thread(sql_connect, statements, dump_output_dir=None, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
support_copy_from=True, encoding='utf-8'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
82 |
Execute sql statement. If 'INSERT INTO', try to use 'COPY FROM' command, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
83 |
or fallback to execute_many. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
84 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
85 |
if support_copy_from: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
86 |
execmany_func = _execmany_thread_copy_from |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
87 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
88 |
execmany_func = _execmany_thread_not_copy_from |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
89 |
cnx = sql_connect() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
90 |
cu = cnx.cursor() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
91 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
92 |
for statement, data in statements: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
table = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
94 |
columns = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
95 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
96 |
if not statement.startswith('INSERT INTO'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
cu.executemany(statement, data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
continue |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
table = statement.split()[2] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
if isinstance(data[0], (tuple, list)): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
101 |
columns = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
columns = list(data[0]) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
execmany_func(cu, statement, data, table, columns, encoding) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
105 |
except Exception: |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
106 |
print('unable to copy data into table %s' % table) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
107 |
# Error in import statement, save data in dump_output_dir |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
108 |
if dump_output_dir is not None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
109 |
pdata = {'data': data, 'statement': statement, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
110 |
'time': asctime(), 'columns': columns} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
111 |
filename = make_uid() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
112 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
113 |
with open(osp.join(dump_output_dir, |
10598
b3f9f929385f
[dataimport] Use pickle.dump instead of dumps
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10592
diff
changeset
|
114 |
'%s.pickle' % filename), 'wb') as fobj: |
10602
4845012cfc8e
[py3k] import 'pickle' using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10598
diff
changeset
|
115 |
pickle.dump(pdata, fobj) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
116 |
except IOError: |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
117 |
print('ERROR while pickling in', dump_output_dir, filename+'.pickle') |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
118 |
cnx.rollback() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
119 |
raise |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
120 |
finally: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
121 |
cnx.commit() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
122 |
cu.close() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
123 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
124 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
125 |
def _copyfrom_buffer_convert_None(value, **opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
'''Convert None value to "NULL"''' |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
127 |
return u'NULL' |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
128 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
129 |
def _copyfrom_buffer_convert_number(value, **opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
'''Convert a number into its string representation''' |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
131 |
return text_type(value) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
132 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
133 |
def _copyfrom_buffer_convert_string(value, **opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
134 |
'''Convert string value. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
135 |
''' |
10592
dfa1dcf4d7f1
[py3k] ur'' is gone
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10589
diff
changeset
|
136 |
escape_chars = ((u'\\', u'\\\\'), (u'\t', u'\\t'), (u'\r', u'\\r'), |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
137 |
(u'\n', u'\\n')) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
138 |
for char, replace in escape_chars: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
139 |
value = value.replace(char, replace) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
140 |
return value |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
141 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
142 |
def _copyfrom_buffer_convert_date(value, **opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
143 |
'''Convert date into "YYYY-MM-DD"''' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
144 |
# Do not use strftime, as it yields issue with date < 1900 |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
145 |
# (http://bugs.python.org/issue1777412) |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
146 |
return u'%04d-%02d-%02d' % (value.year, value.month, value.day) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
147 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
148 |
def _copyfrom_buffer_convert_datetime(value, **opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
149 |
'''Convert date into "YYYY-MM-DD HH:MM:SS.UUUUUU"''' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
150 |
# Do not use strftime, as it yields issue with date < 1900 |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
151 |
# (http://bugs.python.org/issue1777412) |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
152 |
return u'%s %s' % (_copyfrom_buffer_convert_date(value, **opts), |
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
153 |
_copyfrom_buffer_convert_time(value, **opts)) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
154 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
155 |
def _copyfrom_buffer_convert_time(value, **opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
156 |
'''Convert time into "HH:MM:SS.UUUUUU"''' |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
157 |
return u'%02d:%02d:%02d.%06d' % (value.hour, value.minute, |
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
158 |
value.second, value.microsecond) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
159 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
160 |
# (types, converter) list. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
161 |
_COPYFROM_BUFFER_CONVERTERS = [ |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
162 |
(type(None), _copyfrom_buffer_convert_None), |
10613
8d9fe02387e3
[py3k] six.integer_types
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10612
diff
changeset
|
163 |
(integer_types + (float,), _copyfrom_buffer_convert_number), |
10612
84468b90e9c1
[py3k] basestring → six.string_types
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10609
diff
changeset
|
164 |
(string_types, _copyfrom_buffer_convert_string), |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
165 |
(datetime, _copyfrom_buffer_convert_datetime), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
166 |
(date, _copyfrom_buffer_convert_date), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
167 |
(time, _copyfrom_buffer_convert_time), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
168 |
] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
169 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
170 |
def _create_copyfrom_buffer(data, columns=None, **convert_opts): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
171 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
172 |
Create a StringIO buffer for 'COPY FROM' command. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
173 |
Deals with Unicode, Int, Float, Date... (see ``converters``) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
174 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
175 |
:data: a sequence/dict of tuples |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
176 |
:columns: list of columns to consider (default to all columns) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
177 |
:converter_opts: keyword arguements given to converters |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
178 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
179 |
# Create a list rather than directly create a StringIO |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
180 |
# to correctly write lines separated by '\n' in a single step |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
181 |
rows = [] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
182 |
if columns is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
183 |
if isinstance(data[0], (tuple, list)): |
10609
e2d8e81bfe68
[py3k] import range using six.moves
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10602
diff
changeset
|
184 |
columns = list(range(len(data[0]))) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
185 |
elif isinstance(data[0], dict): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
186 |
columns = data[0].keys() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
187 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
raise ValueError('Could not get columns: you must provide columns.') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
189 |
for row in data: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
190 |
# Iterate over the different columns and the different values |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
# and try to convert them to a correct datatype. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
192 |
# If an error is raised, do not continue. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
193 |
formatted_row = [] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
194 |
for col in columns: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
195 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
196 |
value = row[col] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
197 |
except KeyError: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
198 |
warnings.warn(u"Column %s is not accessible in row %s" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
199 |
% (col, row), RuntimeWarning) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
200 |
# XXX 'value' set to None so that the import does not end in |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
201 |
# error. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
202 |
# Instead, the extra keys are set to NULL from the |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
203 |
# database point of view. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
value = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
for types, converter in _COPYFROM_BUFFER_CONVERTERS: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
if isinstance(value, types): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
value = converter(value, **convert_opts) |
10810
0768bf2333a7
[dataimport] give unicode objects to psycopg2 copy_from
Julien Cristau <julien.cristau@logilab.fr>
parents:
10662
diff
changeset
|
208 |
assert isinstance(value, text_type) |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
break |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
raise ValueError("Unsupported value type %s" % type(value)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
212 |
# We push the value to the new formatted row |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
# if the value is not None and could be converted to a string. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
214 |
formatted_row.append(value) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
rows.append('\t'.join(formatted_row)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
216 |
return StringIO('\n'.join(rows)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
class SQLGenObjectStore(NoHookRQLObjectStore): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
"""Controller of the data import process. This version is based |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
221 |
on direct insertions throught SQL command (COPY FROM or execute many). |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
>>> store = SQLGenObjectStore(cnx) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
224 |
>>> store.create_entity('Person', ...) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
225 |
>>> store.flush() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
226 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
227 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
def __init__(self, cnx, dump_output_dir=None, nb_threads_statement=3): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
229 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
Initialize a SQLGenObjectStore. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
Parameters: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
- cnx: connection on the cubicweb instance |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
- dump_output_dir: a directory to dump failed statements |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
for easier recovery. Default is None (no dump). |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
- nb_threads_statement: number of threads used |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
for SQL insertion (default is 3). |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
super(SQLGenObjectStore, self).__init__(cnx) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
241 |
### hijack default source |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
self.source = SQLGenSourceWrapper( |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
self.source, cnx.vreg.schema, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
dump_output_dir=dump_output_dir, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
nb_threads_statement=nb_threads_statement) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
246 |
### XXX This is done in super().__init__(), but should be |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
### redone here to link to the correct source |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
248 |
self.add_relation = self.source.add_relation |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
self.indexes_etypes = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
250 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
251 |
def flush(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
252 |
"""Flush data to the database""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
253 |
self.source.flush() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
254 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
255 |
def relate(self, subj_eid, rtype, obj_eid, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
256 |
if subj_eid is None or obj_eid is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
257 |
return |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
258 |
# XXX Could subjtype be inferred ? |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
259 |
self.source.add_relation(self._cnx, subj_eid, rtype, obj_eid, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
260 |
self.rschema(rtype).inlined, **kwargs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
261 |
if self.rschema(rtype).symmetric: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
262 |
self.source.add_relation(self._cnx, obj_eid, rtype, subj_eid, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
263 |
self.rschema(rtype).inlined, **kwargs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
264 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
265 |
def drop_indexes(self, etype): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
266 |
"""Drop indexes for a given entity type""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
267 |
if etype not in self.indexes_etypes: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
268 |
cu = self._cnx.cnxset.cu |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
269 |
def index_to_attr(index): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
270 |
"""turn an index name to (database) attribute name""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
271 |
return index.replace(etype.lower(), '').replace('idx', '').strip('_') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
272 |
indices = [(index, index_to_attr(index)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
273 |
for index in self.source.dbhelper.list_indices(cu, etype) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
274 |
# Do not consider 'cw_etype_pkey' index |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
275 |
if not index.endswith('key')] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
self.indexes_etypes[etype] = indices |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
for index, attr in self.indexes_etypes[etype]: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
self._cnx.system_sql('DROP INDEX %s' % index) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
280 |
def create_indexes(self, etype): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
"""Recreate indexes for a given entity type""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
for index, attr in self.indexes_etypes.get(etype, []): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
sql = 'CREATE INDEX %s ON cw_%s(%s)' % (index, etype, attr) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
self._cnx.system_sql(sql) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
286 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
287 |
########################################################################### |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
288 |
## SQL Source ############################################################# |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
########################################################################### |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
class SQLGenSourceWrapper(object): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
292 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
def __init__(self, system_source, schema, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
dump_output_dir=None, nb_threads_statement=3): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
self.system_source = system_source |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
self._sql = threading.local() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
# Explicitely backport attributes from system source |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
298 |
self._storage_handler = self.system_source._storage_handler |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
299 |
self.preprocess_entity = self.system_source.preprocess_entity |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
300 |
self.sqlgen = self.system_source.sqlgen |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
self.uri = self.system_source.uri |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
self.eid = self.system_source.eid |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
# Directory to write temporary files |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
304 |
self.dump_output_dir = dump_output_dir |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
305 |
# Allow to execute code with SQLite backend that does |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
306 |
# not support (yet...) copy_from |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
# XXX Should be dealt with in logilab.database |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
spcfrom = system_source.dbhelper.dbapi_module.support_copy_from |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
self.support_copy_from = spcfrom |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
310 |
self.dbencoding = system_source.dbhelper.dbencoding |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
self.nb_threads_statement = nb_threads_statement |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
312 |
# initialize thread-local data for main thread |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
self.init_thread_locals() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
314 |
self._inlined_rtypes_cache = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
self._fill_inlined_rtypes_cache(schema) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
316 |
self.schema = schema |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
self.do_fti = False |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
318 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
319 |
def _fill_inlined_rtypes_cache(self, schema): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
320 |
cache = self._inlined_rtypes_cache |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
321 |
for eschema in schema.entities(): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
322 |
for rschema in eschema.ordered_relations(): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
323 |
if rschema.inlined: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
324 |
cache[eschema.type] = SQL_PREFIX + rschema.type |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
325 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
326 |
def init_thread_locals(self): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
327 |
"""initializes thread-local data""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
self._sql.entities = defaultdict(list) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
self._sql.relations = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
self._sql.inlined_relations = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
# keep track, for each eid of the corresponding data dict |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
332 |
self._sql.eid_insertdicts = {} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
333 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
def flush(self): |
10589
7c23b7de2b8d
[py3k] print function
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10513
diff
changeset
|
335 |
print('starting flush') |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
336 |
_entities_sql = self._sql.entities |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
337 |
_relations_sql = self._sql.relations |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
338 |
_inlined_relations_sql = self._sql.inlined_relations |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
339 |
_insertdicts = self._sql.eid_insertdicts |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
340 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
341 |
# try, for each inlined_relation, to find if we're also creating |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
342 |
# the host entity (i.e. the subject of the relation). |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
343 |
# In that case, simply update the insert dict and remove |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
344 |
# the need to make the |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
345 |
# UPDATE statement |
10662
10942ed172de
[py3k] dict.iteritems → dict.items
Rémi Cardona <remi.cardona@logilab.fr>
parents:
10613
diff
changeset
|
346 |
for statement, datalist in _inlined_relations_sql.items(): |
10513
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
347 |
new_datalist = [] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
# for a given inlined relation, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
# browse each couple to be inserted |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
350 |
for data in datalist: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
351 |
keys = list(data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
352 |
# For inlined relations, it exists only two case: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
353 |
# (rtype, cw_eid) or (cw_eid, rtype) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
354 |
if keys[0] == 'cw_eid': |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
rtype = keys[1] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
357 |
rtype = keys[0] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
358 |
updated_eid = data['cw_eid'] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
if updated_eid in _insertdicts: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
360 |
_insertdicts[updated_eid][rtype] = data[rtype] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
361 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
362 |
# could not find corresponding insert dict, keep the |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
363 |
# UPDATE query |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
364 |
new_datalist.append(data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
365 |
_inlined_relations_sql[statement] = new_datalist |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
366 |
_import_statements(self.system_source.get_connection, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
367 |
_entities_sql.items() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
368 |
+ _relations_sql.items() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
369 |
+ _inlined_relations_sql.items(), |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
dump_output_dir=self.dump_output_dir, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
371 |
nb_threads=self.nb_threads_statement, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
372 |
support_copy_from=self.support_copy_from, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
encoding=self.dbencoding) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
374 |
finally: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
375 |
_entities_sql.clear() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
376 |
_relations_sql.clear() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
377 |
_insertdicts.clear() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
378 |
_inlined_relations_sql.clear() |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
379 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
380 |
def add_relation(self, cnx, subject, rtype, object, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
381 |
inlined=False, **kwargs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
382 |
if inlined: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
383 |
_sql = self._sql.inlined_relations |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
384 |
data = {'cw_eid': subject, SQL_PREFIX + rtype: object} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
385 |
subjtype = kwargs.get('subjtype') |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
386 |
if subjtype is None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
387 |
# Try to infer it |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
388 |
targets = [t.type for t in |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
389 |
self.schema.rschema(rtype).subjects()] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
390 |
if len(targets) == 1: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
391 |
subjtype = targets[0] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
392 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
393 |
raise ValueError('You should give the subject etype for ' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
394 |
'inlined relation %s' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
395 |
', as it cannot be inferred: ' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
396 |
'this type is given as keyword argument ' |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
397 |
'``subjtype``'% rtype) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
398 |
statement = self.sqlgen.update(SQL_PREFIX + subjtype, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
399 |
data, ['cw_eid']) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
400 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
401 |
_sql = self._sql.relations |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
402 |
data = {'eid_from': subject, 'eid_to': object} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
403 |
statement = self.sqlgen.insert('%s_relation' % rtype, data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
404 |
if statement in _sql: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
405 |
_sql[statement].append(data) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
406 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
407 |
_sql[statement] = [data] |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
408 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
409 |
def add_entity(self, cnx, entity): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
410 |
with self._storage_handler(entity, 'added'): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
411 |
attrs = self.preprocess_entity(entity) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
412 |
rtypes = self._inlined_rtypes_cache.get(entity.cw_etype, ()) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
413 |
if isinstance(rtypes, str): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
414 |
rtypes = (rtypes,) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
415 |
for rtype in rtypes: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
416 |
if rtype not in attrs: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
417 |
attrs[rtype] = None |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
418 |
sql = self.sqlgen.insert(SQL_PREFIX + entity.cw_etype, attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
419 |
self._sql.eid_insertdicts[entity.eid] = attrs |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
420 |
self._append_to_entities(sql, attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
421 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
422 |
def _append_to_entities(self, sql, attrs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
423 |
self._sql.entities[sql].append(attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
424 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
425 |
def _handle_insert_entity_sql(self, cnx, sql, attrs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
426 |
# We have to overwrite the source given in parameters |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
427 |
# as here, we directly use the system source |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
428 |
attrs['asource'] = self.system_source.uri |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
429 |
self._append_to_entities(sql, attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
430 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
431 |
def _handle_is_relation_sql(self, cnx, sql, attrs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
432 |
self._append_to_entities(sql, attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
433 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
434 |
def _handle_is_instance_of_sql(self, cnx, sql, attrs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
435 |
self._append_to_entities(sql, attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
436 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
437 |
def _handle_source_relation_sql(self, cnx, sql, attrs): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
438 |
self._append_to_entities(sql, attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
439 |
|
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
440 |
# add_info is _copypasted_ from the one in NativeSQLSource. We want it |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
441 |
# there because it will use the _handlers of the SQLGenSourceWrapper, which |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
442 |
# are not like the ones in the native source. |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
443 |
def add_info(self, cnx, entity, source, extid): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
444 |
"""add type and source info for an eid into the system table""" |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
445 |
# begin by inserting eid/type/source/extid into the entities table |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
446 |
if extid is not None: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
447 |
assert isinstance(extid, str) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
448 |
extid = b64encode(extid) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
449 |
attrs = {'type': entity.cw_etype, 'eid': entity.eid, 'extid': extid, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
450 |
'asource': source.uri} |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
451 |
self._handle_insert_entity_sql(cnx, self.sqlgen.insert('entities', attrs), attrs) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
452 |
# insert core relations: is, is_instance_of and cw_source |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
453 |
try: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
454 |
self._handle_is_relation_sql(cnx, 'INSERT INTO is_relation(eid_from,eid_to) VALUES (%s,%s)', |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
455 |
(entity.eid, eschema_eid(cnx, entity.e_schema))) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
456 |
except IndexError: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
457 |
# during schema serialization, skip |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
458 |
pass |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
459 |
else: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
460 |
for eschema in entity.e_schema.ancestors() + [entity.e_schema]: |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
461 |
self._handle_is_relation_sql(cnx, |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
462 |
'INSERT INTO is_instance_of_relation(eid_from,eid_to) VALUES (%s,%s)', |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
463 |
(entity.eid, eschema_eid(cnx, eschema))) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
464 |
if 'CWSource' in self.schema and source.eid is not None: # else, cw < 3.10 |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
465 |
self._handle_is_relation_sql(cnx, 'INSERT INTO cw_source_relation(eid_from,eid_to) VALUES (%s,%s)', |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
466 |
(entity.eid, source.eid)) |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
467 |
# now we can update the full text index |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
468 |
if self.do_fti and self.need_fti_indexation(entity.cw_etype): |
7bec01a59f92
[dataimport] dispatch and deprecate old code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
469 |
self.index_entity(cnx, entity=entity) |