author | Samuel Trégouët <samuel.tregouet@logilab.fr> |
Tue, 15 Dec 2015 10:13:55 +0100 | |
changeset 11021 | 91db22bb8d29 |
parent 11020 | c8c8f6a6147f |
child 11022 | 1032d7956b11 |
permissions | -rw-r--r-- |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# coding: utf-8 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# This file is part of CubicWeb. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# terms of the GNU Lesser General Public License as published by the Free |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# any later version. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# details. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# You should have received a copy of the GNU Lesser General Public License along |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
import logging |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
from datetime import datetime |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
from collections import defaultdict |
10854
f437787d8849
[dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents:
10853
diff
changeset
|
23 |
from io import StringIO |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
24 |
|
10859
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
25 |
from six.moves import range |
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
26 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
27 |
from yams.constraints import SizeConstraint |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
29 |
from psycopg2 import ProgrammingError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
|
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
31 |
from cubicweb.server.schema2sql import rschema_has_table |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
32 |
from cubicweb.schema import PURE_VIRTUAL_RTYPES |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
from cubicweb.dataimport import stores, pgstore |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
from cubicweb.utils import make_uid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
from cubicweb.server.sqlutils import SQL_PREFIX |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
class MassiveObjectStore(stores.RQLObjectStore): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
Store for massive import of data, with delayed insertion of meta data. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
WARNINGS: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
- This store may be only used with PostgreSQL for now, as it relies |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
on the COPY FROM method, and on specific PostgreSQL tables to get all |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
the indexes. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
- This store can only insert relations that are not inlined (i.e., |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
which do *not* have inlined=True in their definition in the schema). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
It should be used as follows: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
store = MassiveObjectStore(cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
store.init_rtype_table('Person', 'lives_in', 'Location') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
|
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
55 |
store.prepare_insert_entity('Person', subj_iid_attribute=person_iid, ...) |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
56 |
store.prepare_insert_entity('Location', obj_iid_attribute=location_iid, ...) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
# subj_iid_attribute and obj_iid_attribute are argument names |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
# chosen by the user (e.g. "cwuri"). These names can be identical. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
61 |
# person_iid and location_iid are unique IDs and depend on the data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
# (e.g URI). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
store.relate_by_iid(person_iid, 'lives_in', location_iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
65 |
# For example: |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
66 |
store.prepare_insert_entity('Person', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
67 |
cwuri='http://dbpedia.org/toto', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
68 |
name='Toto') |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
69 |
store.prepare_insert_entity('Location', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
70 |
uri='http://geonames.org/11111', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
71 |
name='Somewhere') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
72 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
store.relate_by_iid('http://dbpedia.org/toto', |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
74 |
'lives_in', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
75 |
'http://geonames.org/11111') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
# Finally |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
store.convert_relations('Person', 'lives_in', 'Location', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
78 |
'subj_iid_attribute', 'obj_iid_attribute') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
79 |
# For the previous example: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
... |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
82 |
store.commit() |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
83 |
store.finish() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
84 |
""" |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
85 |
# size of eid range reserved by the store for each batch |
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
86 |
eids_seq_range = 10000 |
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
87 |
# initial eid (None means use the value in the db) |
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
88 |
eids_seq_start = None |
10873
0611466ce367
[dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents:
10872
diff
changeset
|
89 |
# max size of the iid, used to create the iid_eid conversion table |
0611466ce367
[dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents:
10872
diff
changeset
|
90 |
iid_maxsize = 1024 |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
91 |
|
10875
75d1b2d66f18
[dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10874
diff
changeset
|
92 |
def __init__(self, cnx, |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
on_commit_callback=None, on_rollback_callback=None, |
10865
2537df9fdd27
[dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10864
diff
changeset
|
94 |
slave_mode=False, |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
95 |
source=None): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
96 |
""" Create a MassiveObject store, with the following attributes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
- cnx: CubicWeb cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
super(MassiveObjectStore, self).__init__(cnx) |
10877
a6a9a8fc94c3
[dataimport] rename logger from dataio.massiveimport to dataimport.massive_store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10876
diff
changeset
|
101 |
self.logger = logging.getLogger('dataimport.massive_store') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
self._cnx = cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
self.sql = cnx.system_sql |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
self._data_uri_relations = defaultdict(list) |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
105 |
|
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
106 |
# etypes for which we have a uri_eid_%(etype)s table |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
107 |
self._init_uri_eid = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
108 |
# etypes for which we have a uri_eid_%(e)s_idx index |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
109 |
self._uri_eid_inserted = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
110 |
# set of rtypes for which we have a %(rtype)s_relation_iid_tmp table |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
111 |
self._uri_rtypes = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
112 |
# set of etypes whose tables are created |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
113 |
self._entities = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
114 |
# set of rtypes for which we have a %(rtype)s_relation_tmp table |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
115 |
self._rtypes = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
116 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
117 |
self.slave_mode = slave_mode |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
118 |
self.default_values = get_default_values(cnx.vreg.schema) |
10929
73e6ec94504b
[dataimport] fix db-namespace default value handling
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
10883
diff
changeset
|
119 |
pg_schema = cnx.repo.config.system_source_config.get('db-namespace') or 'public' |
10870
9dedf464596b
[dataimport] remove pg_schema parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10869
diff
changeset
|
120 |
self._dbh = PGHelper(self._cnx, pg_schema) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
121 |
self._data_entities = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
122 |
self._data_relations = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
123 |
self._now = datetime.now() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
124 |
self._default_cwuri = make_uid('_auto_generated') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
125 |
self._count_cwuri = 0 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
self.on_commit_callback = on_commit_callback |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
127 |
self.on_rollback_callback = on_rollback_callback |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
128 |
# Do our meta tables already exist? |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
129 |
self._init_massive_metatables() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
# Internal markers of initialization |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
131 |
if self.eids_seq_start is not None and not self.slave_mode: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
132 |
self._cnx.system_sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange( |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
133 |
'entities_id_seq', initial_value=self.eids_seq_start + 1)) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
134 |
cnx.commit() |
10855
cd91f46fa633
[dataimport] use next builtin instead of next method on iterators
Julien Cristau <julien.cristau@logilab.fr>
parents:
10854
diff
changeset
|
135 |
self.get_next_eid = lambda g=self._get_eid_gen(): next(g) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
136 |
# recreate then when self.finish() is called |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
137 |
|
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
138 |
if not self.slave_mode: |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
139 |
self._drop_all_constraints() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
140 |
self._drop_metatables_constraints() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
141 |
if source is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
142 |
source = cnx.repo.system_source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
143 |
self.source = source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
144 |
self._etype_eid_idx = dict(cnx.execute('Any XN,X WHERE X is CWEType, X name XN')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
145 |
cnx.read_security = False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
146 |
cnx.write_security = False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
147 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
148 |
### INIT FUNCTIONS ######################################################## |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
149 |
|
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
150 |
def _drop_all_constraints(self): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
151 |
schema = self._cnx.vreg.schema |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
152 |
tables = ['cw_%s' % etype.type.lower() |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
153 |
for etype in schema.entities() if not etype.final] |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
154 |
for rschema in schema.relations(): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
155 |
if rschema.inlined: |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
156 |
continue |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
157 |
elif rschema_has_table(rschema, skip_relations=PURE_VIRTUAL_RTYPES): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
158 |
tables.append('%s_relation' % rschema.type.lower()) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
159 |
tables.append('entities') |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
160 |
for tablename in tables: |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
161 |
self._store_and_drop_constraints(tablename) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
162 |
|
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
163 |
def _store_and_drop_constraints(self, tablename): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
164 |
if not self._constraint_table_created: |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
165 |
# Create a table to save the constraints |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
166 |
# Allow reload even after crash |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
167 |
sql = "CREATE TABLE cwmassive_constraints (origtable text, query text, type varchar(256))" |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
168 |
self.sql(sql) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
169 |
self._constraint_table_created = True |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
170 |
constraints = self._dbh.application_constraints(tablename) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
171 |
for name, query in constraints.items(): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
172 |
sql = 'INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
173 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'constraint'}) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
174 |
sql = 'ALTER TABLE %s DROP CONSTRAINT %s' % (tablename, name) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
175 |
self.sql(sql) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
176 |
|
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
177 |
def reapply_all_constraints(self): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
178 |
if not self._dbh.table_exists('cwmassive_constraints'): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
179 |
self.logger.info('The table cwmassive_constraints does not exist') |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
180 |
return |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
181 |
sql = 'SELECT query FROM cwmassive_constraints WHERE type = %(t)s' |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
182 |
crs = self.sql(sql, {'t': 'constraint'}) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
183 |
for query, in crs.fetchall(): |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
184 |
self.sql(query) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
185 |
self.sql('DELETE FROM cwmassive_constraints WHERE type = %(t)s ' |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
186 |
'AND query = %(q)s', {'t': 'constraint', 'q': query}) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
187 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
def init_rtype_table(self, etype_from, rtype, etype_to): |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
189 |
""" Build temporary table for standard rtype """ |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
190 |
# Create an uri_eid table for each etype for a better |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
191 |
# control of which etype is concerned by a particular |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
192 |
# possibly multivalued relation. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
193 |
for etype in (etype_from, etype_to): |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
194 |
if etype and etype not in self._init_uri_eid: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
195 |
self._init_uri_eid_table(etype) |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
196 |
if rtype not in self._uri_rtypes: |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
197 |
# Create the temporary table |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
198 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
199 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
200 |
sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
201 |
'varying(%(s)s), uri_to character varying(%(s)s))' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
202 |
self.sql(sql % {'r': rtype, 's': self.iid_maxsize}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
203 |
except ProgrammingError: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
# XXX Already exist (probably due to multiple import) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
pass |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
208 |
# Add it to the initialized set |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
209 |
self._uri_rtypes.add(rtype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
def _init_uri_eid_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
212 |
""" Build a temporary table for id/eid convertion |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
214 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
sql = "CREATE TABLE uri_eid_%(e)s (uri character varying(%(size)s), eid integer)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
216 |
self.sql(sql % {'e': etype.lower(), 'size': self.iid_maxsize,}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
except ProgrammingError: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
# XXX Already exist (probably due to multiple import) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
pass |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
# Add it to the initialized set |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
221 |
self._init_uri_eid.add(etype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
|
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
223 |
def _init_massive_metatables(self): |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
224 |
# Check if our tables are not already created (i.e. a restart) |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
225 |
self._initialized_table_created = self._dbh.table_exists('cwmassive_initialized') |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
226 |
self._constraint_table_created = self._dbh.table_exists('cwmassive_constraints') |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
227 |
self._metadata_table_created = self._dbh.table_exists('cwmassive_metadata') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
229 |
### RELATE FUNCTION ####################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
def relate_by_iid(self, iid_from, rtype, iid_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
"""Add new relation based on the internal id (iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
of the entities (not the eid)""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
# Push data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
if isinstance(iid_from, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
iid_from = iid_from.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
if isinstance(iid_to, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
iid_to = iid_to.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
241 |
### FLUSH FUNCTIONS ####################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
def flush_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
246 |
for rtype, data in self._data_uri_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
248 |
self.logger.info('No data for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data])) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
250 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
251 |
self.logger.info('Empty Buffer for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
252 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
253 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
254 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
255 |
cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
256 |
null='NULL', columns=('uri_from', 'uri_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
257 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
258 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
259 |
buf.close() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
260 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
261 |
self._data_uri_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
262 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
263 |
def fill_uri_eid_table(self, etype, uri_label): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
264 |
""" Fill the uri_eid table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
265 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
266 |
self.logger.info('Fill uri_eid for etype %s', etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
267 |
sql = 'INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
268 |
self.sql(sql % {'l': uri_label, 'e': etype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
269 |
# Add indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
270 |
self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
271 |
# Set the etype as converted |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
272 |
self._uri_eid_inserted.add(etype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
273 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
274 |
def convert_relations(self, etype_from, rtype, etype_to, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
275 |
uri_label_from='cwuri', uri_label_to='cwuri'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
""" Flush the converted relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
# Always flush relations to be sure |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
280 |
self.flush_relations() |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
281 |
if uri_label_from and etype_from not in self._uri_eid_inserted: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
self.fill_uri_eid_table(etype_from, uri_label_from) |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
283 |
if uri_label_to and etype_to not in self._uri_eid_inserted: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
self.fill_uri_eid_table(etype_to, uri_label_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
if self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
286 |
self.logger.warning("Can't insert inlined relation %s", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
287 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
288 |
if uri_label_from and uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
292 |
SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
elif uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
CAST(T.uri_from AS INTEGER), O1.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
298 |
WHERE O1.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
299 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
300 |
TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
elif uri_label_from: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
304 |
O1.eid, CAST(T.uri_to AS INTEGER) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
305 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
306 |
WHERE O1.uri=T.uri_from AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER)); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
310 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
self.sql(sql % {'r': rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
312 |
'et': etype_to.lower() if etype_to else u'', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
'ef': etype_from.lower() if etype_from else u''}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
314 |
except Exception as ex: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
self.logger.error("Can't insert relation %s: %s", rtype, ex) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
316 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
### SQL UTILITIES ######################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
318 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
319 |
def drop_and_store_indexes_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
320 |
# Drop indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
321 |
if not self._constraint_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
322 |
# Create a table to save the constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
323 |
# Allow reload even after crash |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
324 |
sql = "CREATE TABLE cwmassive_constraints (origtable text, query text, type varchar(256))" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
325 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
326 |
self._constraint_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
327 |
self._drop_table_constraints_indexes(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
def _drop_table_constraints_indexes(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
""" Drop and store table constraints and indexes """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
indexes, constraints = self._dbh.application_indexes_constraints(tablename) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
332 |
for name, query in constraints.items(): |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
333 |
sql = 'INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'constraint'}) |
11021
91db22bb8d29
[dataimport] massive_store: do not drop constraints with `cascade` since it breaks things
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11020
diff
changeset
|
335 |
sql = 'ALTER TABLE %s DROP CONSTRAINT %s' % (tablename, name) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
336 |
self.sql(sql) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
337 |
for name, query in indexes.items(): |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
338 |
sql = 'INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
339 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'index'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
340 |
sql = 'DROP INDEX %s' % name |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
341 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
342 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
343 |
def reapply_constraint_index(self, tablename): |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
344 |
if not self._dbh.table_exists('cwmassive_constraints'): |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
345 |
self.logger.info('The table cwmassive_constraints does not exist') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
346 |
return |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
347 |
sql = 'SELECT query FROM cwmassive_constraints WHERE origtable = %(e)s' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
crs = self.sql(sql, {'e': tablename}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
for query, in crs.fetchall(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
350 |
self.sql(query) |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
351 |
self.sql('DELETE FROM cwmassive_constraints WHERE origtable = %(e)s ' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
352 |
'AND query = %(q)s', {'e': tablename, 'q': query}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
353 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
354 |
def _drop_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
""" Drop all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
for tablename in ('created_by_relation', 'owned_by_relation', |
10861
6205b89c3af6
[massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10859
diff
changeset
|
357 |
'is_instance_of_relation', 'is_relation', |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
358 |
'entities'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
self.drop_and_store_indexes_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
360 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
361 |
def _create_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
362 |
""" Create all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
363 |
for tablename in ('entities', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
364 |
'created_by_relation', 'owned_by_relation', |
10861
6205b89c3af6
[massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10859
diff
changeset
|
365 |
'is_instance_of_relation', 'is_relation'): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
366 |
# Indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
367 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
368 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
369 |
def init_relation_table(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
""" Get and remove all indexes for performance sake """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
371 |
# Create temporary table |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
372 |
if not self.slave_mode and rtype not in self._rtypes: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
374 |
self.sql(sql) |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
375 |
# Drop indexes and constraints |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
376 |
tablename = '%s_relation' % rtype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
377 |
self.drop_and_store_indexes_constraints(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
378 |
# Push the etype in the initialized table for easier restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
379 |
self.init_create_initialized_table() |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
380 |
sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
381 |
self.sql(sql, {'e': rtype, 't': 'rtype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
382 |
# Mark rtype as "initialized" for faster check |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
383 |
self._rtypes.add(rtype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
384 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
385 |
def init_create_initialized_table(self): |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
386 |
""" Create the cwmassive initialized table |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
387 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
388 |
if not self._initialized_table_created: |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
389 |
sql = "CREATE TABLE cwmassive_initialized (retype text, type varchar(128))" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
390 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
391 |
self._initialized_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
392 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
393 |
def init_etype_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
394 |
""" Add eid sequence to a particular etype table and |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
395 |
remove all indexes for performance sake """ |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
396 |
if etype not in self._entities: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
397 |
# Only for non-initialized etype and not slave mode store |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
398 |
if not self.slave_mode: |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
399 |
if self.eids_seq_range is None: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
400 |
# Eids are directly set by the entities_id_seq. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
401 |
# We attach this sequence to all the created etypes. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
402 |
sql = ("ALTER TABLE cw_%s ALTER COLUMN cw_eid " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
403 |
"SET DEFAULT nextval('entities_id_seq')" % etype.lower()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
404 |
self.sql(sql) |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
405 |
# Drop indexes and constraints |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
406 |
tablename = 'cw_%s' % etype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
407 |
self.drop_and_store_indexes_constraints(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
408 |
# Push the etype in the initialized table for easier restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
409 |
self.init_create_initialized_table() |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
410 |
sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
411 |
self.sql(sql, {'e': etype, 't': 'etype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
412 |
# Mark etype as "initialized" for faster check |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
413 |
self._entities.add(etype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
414 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
415 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
416 |
### ENTITIES CREATION ##################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
417 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
418 |
def _get_eid_gen(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
419 |
""" Function getting the next eid. This is done by preselecting |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
420 |
a given number of eids from the 'entities_id_seq', and then |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
421 |
storing them""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
422 |
while True: |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
423 |
last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range) |
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
424 |
for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
425 |
yield eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
426 |
|
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
427 |
def _apply_default_values(self, etype, kwargs): |
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
428 |
"""Apply the default values for a given etype, attribute and value.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
429 |
default_values = self.default_values[etype] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
430 |
missing_keys = set(default_values) - set(kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
431 |
kwargs.update((key, default_values[key]) for key in missing_keys) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
432 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
433 |
# store api ################################################################ |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
434 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
435 |
def prepare_insert_entity(self, etype, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
436 |
"""Given an entity type, attributes and inlined relations, returns the inserted entity's |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
437 |
eid. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
438 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
439 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
440 |
self.init_etype_table(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
441 |
# Add meta data if not given |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
442 |
if 'modification_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
443 |
kwargs['modification_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
444 |
if 'creation_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
445 |
kwargs['creation_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
446 |
if 'cwuri' not in kwargs: |
10874
538e17174769
[dataimport] remove uri_param_name from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10873
diff
changeset
|
447 |
kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri) |
538e17174769
[dataimport] remove uri_param_name from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10873
diff
changeset
|
448 |
self._count_cwuri += 1 |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
449 |
if 'eid' not in kwargs and self.eids_seq_range is not None: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
450 |
# If eid is not given and the eids sequence is set, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
451 |
# use the value from the sequence |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
452 |
kwargs['eid'] = self.get_next_eid() |
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
453 |
self._apply_default_values(etype, kwargs) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
454 |
self._data_entities[etype].append(kwargs) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
455 |
return kwargs.get('eid') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
456 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
457 |
def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
458 |
"""Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
459 |
and ``eid_to``. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
460 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
461 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
462 |
self.init_relation_table(rtype) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
463 |
self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
464 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
465 |
def flush(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
466 |
"""Flush the data""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
467 |
self.flush_entities() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
468 |
self.flush_internal_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
469 |
self.flush_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
470 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
471 |
def commit(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
472 |
"""Commit the database transaction.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
473 |
self.on_commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
474 |
super(MassiveObjectStore, self).commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
475 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
476 |
def finish(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
477 |
"""Remove temporary tables and columns.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
478 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
479 |
if self.slave_mode: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
480 |
raise RuntimeError('Store cleanup is not allowed in slave mode') |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
481 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
482 |
# Cleanup relations tables |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
483 |
for etype in self._init_uri_eid: |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
484 |
self.sql('DROP TABLE uri_eid_%s' % etype.lower()) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
485 |
# Remove relations tables |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
486 |
for rtype in self._uri_rtypes: |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
487 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
488 |
self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
489 |
else: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
490 |
self.logger.warning("inlined relation %s: no cleanup to be done for it" % rtype) |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
491 |
# Create meta constraints (entities, is_instance_of, ...) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
492 |
self._create_metatables_constraints() |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
493 |
# Get all the initialized etypes/rtypes |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
494 |
if self._dbh.table_exists('cwmassive_initialized'): |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
495 |
crs = self.sql('SELECT retype, type FROM cwmassive_initialized') |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
496 |
for retype, _type in crs.fetchall(): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
497 |
self.logger.info('Cleanup for %s' % retype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
498 |
if _type == 'etype': |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
499 |
# Cleanup entities tables - Recreate indexes |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
500 |
self._cleanup_entities(retype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
501 |
elif _type == 'rtype': |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
502 |
# Cleanup relations tables |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
503 |
self._cleanup_relations(retype) |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
504 |
self.sql('DELETE FROM cwmassive_initialized WHERE retype = %(e)s', |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
505 |
{'e': retype}) |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
506 |
self.reapply_all_constraints() |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
507 |
# Delete the meta data table |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
508 |
for table_name in ('cwmassive_initialized', 'cwmassive_constraints', 'cwmassive_metadata'): |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
509 |
if self._dbh.table_exists(table_name): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
510 |
self.sql('DROP TABLE %s' % table_name) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
511 |
self.commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
512 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
513 |
### FLUSH ################################################################# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
514 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
515 |
def on_commit(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
516 |
if self.on_commit_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
517 |
self.on_commit_callback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
518 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
519 |
def on_rollback(self, exc, etype, data): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
520 |
if self.on_rollback_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
521 |
self.on_rollback_callback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
522 |
self._cnx.rollback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
523 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
524 |
raise exc |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
525 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
526 |
def flush_internal_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
527 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
528 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
529 |
for rtype, data in self._data_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
530 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
531 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
532 |
continue |
10871
1d4a94d04ec6
[dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10870
diff
changeset
|
533 |
buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to')) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
534 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
535 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
536 |
raise ValueError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
537 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
538 |
# Push into the tmp table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
539 |
cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
540 |
null='NULL', columns=('eid_from', 'eid_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
541 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
542 |
self._data_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
543 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
544 |
def flush_entities(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
545 |
""" Flush the entities data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
546 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
547 |
for etype, data in self._data_entities.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
548 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
549 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
550 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
551 |
# XXX It may be interresting to directly infer the columns' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
552 |
# names from the schema instead of using .keys() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
553 |
columns = data[0].keys() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
554 |
# XXX For now, the _create_copyfrom_buffer does a "row[column]" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
555 |
# which can lead to a key error. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
556 |
# Thus we should create dictionary with all the keys. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
557 |
columns = set() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
558 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
559 |
columns.update(d.keys()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
560 |
_data = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
561 |
_base_data = dict.fromkeys(columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
562 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
563 |
_d = _base_data.copy() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
564 |
_d.update(d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
565 |
_data.append(_d) |
10871
1d4a94d04ec6
[dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10870
diff
changeset
|
566 |
buf = pgstore._create_copyfrom_buffer(_data, columns) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
567 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
568 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
569 |
raise ValueError('Error in buffer creation for etype %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
570 |
columns = ['cw_%s' % attr for attr in columns] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
571 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
572 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
573 |
cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
574 |
except Exception as exc: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
575 |
self.on_rollback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
576 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
577 |
self._data_entities[etype] = [] |
10875
75d1b2d66f18
[dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10874
diff
changeset
|
578 |
self.flush_meta_data() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
579 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
580 |
def flush_meta_data(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
581 |
""" Flush the meta data (entities table, is_instance table, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
582 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
583 |
if self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
584 |
raise RuntimeError('Flushing meta data is not allow in slave mode') |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
585 |
if not self._dbh.table_exists('cwmassive_initialized'): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
586 |
self.logger.info('No information available for initialized etypes/rtypes') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
587 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
588 |
if not self._metadata_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
589 |
# Keep the correctly flush meta data in database |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
590 |
sql = "CREATE TABLE cwmassive_metadata (etype text)" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
591 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
592 |
self._metadata_table_created = True |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
593 |
crs = self.sql('SELECT etype FROM cwmassive_metadata') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
594 |
already_flushed = set(e for e, in crs.fetchall()) |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
595 |
crs = self.sql('SELECT retype FROM cwmassive_initialized WHERE type = %(t)s', |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
596 |
{'t': 'etype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
597 |
all_etypes = set(e for e, in crs.fetchall()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
598 |
for etype in all_etypes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
599 |
if etype not in already_flushed: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
600 |
# Deals with meta data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
601 |
self.logger.info('Flushing meta data for %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
602 |
self.insert_massive_meta_data(etype) |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
603 |
sql = 'INSERT INTO cwmassive_metadata VALUES (%(e)s)' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
604 |
self.sql(sql, {'e': etype}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
605 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
606 |
def _cleanup_entities(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
607 |
""" Cleanup etype table """ |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
608 |
if self.eids_seq_range is None: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
609 |
# Remove DEFAULT eids sequence if added |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
610 |
sql = 'ALTER TABLE cw_%s ALTER COLUMN cw_eid DROP DEFAULT;' % etype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
611 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
612 |
# Create indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
613 |
tablename = SQL_PREFIX + etype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
614 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
615 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
616 |
def _cleanup_relations(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
617 |
""" Cleanup rtype table """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
618 |
# Push into relation table while removing duplicate |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
619 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
620 |
T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
621 |
WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
622 |
TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);''' % {'r': rtype} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
623 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
624 |
# Drop temporary relation table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
625 |
sql = ('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
626 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
627 |
# Create indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
628 |
tablename = '%s_relation' % rtype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
629 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
630 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
631 |
def insert_massive_meta_data(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
632 |
""" Massive insertion of meta data for a given etype, based on SQL statements. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
633 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
634 |
# Push data - Use coalesce to avoid NULL (and get 0), if there is no |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
635 |
# entities of this type in the entities table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
636 |
# Meta data relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
637 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'created_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
638 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'owned_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
639 |
self.metagen_push_relation(etype, self.source.eid, 'cw_source_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
640 |
self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
641 |
self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_instance_of_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
642 |
sql = ("INSERT INTO entities (eid, type, asource, extid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
643 |
"SELECT cw_eid, '%s', 'system', NULL FROM cw_%s " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
644 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
645 |
% (etype, etype.lower())) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
646 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
647 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
648 |
def metagen_push_relation(self, etype, eid_to, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
649 |
sql = ("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
650 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
651 |
% (rtype, eid_to, etype.lower())) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
652 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
653 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
654 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
655 |
### CONSTRAINTS MANAGEMENT FUNCTIONS ########################################## |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
656 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
657 |
def get_size_constraints(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
658 |
"""analyzes yams ``schema`` and returns the list of size constraints. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
659 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
660 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
661 |
sub-dictionnaries mapping attribute names -> max size. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
662 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
663 |
size_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
664 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
665 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
666 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
667 |
size_constraints[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
668 |
for rschema, aschema in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
669 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
670 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
671 |
maxsize = None |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
672 |
rdef = rschema.rdef(eschema, aschema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
673 |
for constraint in rdef.constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
674 |
if isinstance(constraint, SizeConstraint): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
675 |
maxsize = constraint.max |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
676 |
eschema_constraints[rschema.type] = maxsize |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
677 |
return size_constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
678 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
679 |
def get_default_values(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
680 |
"""analyzes yams ``schema`` and returns the list of default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
681 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
682 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
683 |
sub-dictionnaries mapping attribute names -> default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
684 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
685 |
default_values = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
686 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
687 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
688 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
689 |
default_values[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
690 |
for rschema, _ in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
691 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
692 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
693 |
if eschema.default(rschema.type) is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
694 |
eschema_constraints[rschema.type] = eschema.default(rschema.type) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
695 |
return default_values |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
696 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
697 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
698 |
class PGHelper(object): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
699 |
def __init__(self, cnx, pg_schema='public'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
700 |
self.cnx = cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
701 |
# Deals with pg schema, see #3216686 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
702 |
self.pg_schema = pg_schema |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
703 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
704 |
def application_indexes_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
705 |
""" Get all the indexes/constraints for a given tablename """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
706 |
indexes = self.application_indexes(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
707 |
constraints = self.application_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
708 |
_indexes = {} |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
709 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
710 |
# Remove pkey indexes (automatically created by constraints) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
711 |
# Specific cases of primary key, see #3224079 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
712 |
if name not in constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
713 |
_indexes[name] = query |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
714 |
return _indexes, constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
715 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
716 |
def table_exists(self, table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
717 |
sql = "SELECT * from information_schema.tables WHERE table_name=%(t)s AND table_schema=%(s)s" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
718 |
crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
719 |
res = crs.fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
720 |
if res: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
721 |
return True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
722 |
return False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
723 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
724 |
# def check_if_primary_key_exists_for_table(self, table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
725 |
# sql = ("SELECT constraint_name FROM information_schema.table_constraints " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
726 |
# "WHERE constraint_type = 'PRIMARY KEY' AND table_name=%(t)s AND table_schema=%(s)s") |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
727 |
# crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
728 |
# res = crs.fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
729 |
# if res: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
730 |
# return True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
731 |
# return False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
732 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
733 |
def index_query(self, name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
734 |
"""Get the request to be used to recreate the index""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
735 |
return self.cnx.system_sql("SELECT pg_get_indexdef(c.oid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
736 |
"from pg_catalog.pg_class c " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
737 |
"LEFT JOIN pg_catalog.pg_namespace n " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
738 |
"ON n.oid = c.relnamespace " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
739 |
"WHERE c.relname = %(r)s AND n.nspname=%(n)s", |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
740 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
741 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
742 |
def constraint_query(self, name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
743 |
"""Get the request to be used to recreate the constraint""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
744 |
return self.cnx.system_sql("SELECT pg_get_constraintdef(c.oid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
745 |
"from pg_catalog.pg_constraint c " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
746 |
"LEFT JOIN pg_catalog.pg_namespace n " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
747 |
"ON n.oid = c.connamespace " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
748 |
"WHERE c.conname = %(r)s AND n.nspname=%(n)s", |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
749 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
750 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
751 |
def application_indexes(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
752 |
""" Iterate over all the indexes """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
753 |
# This SQL query (cf http://www.postgresql.org/message-id/432F450F.4080700@squiz.net) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
754 |
# aims at getting all the indexes for each table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
755 |
sql = '''SELECT c.relname as "Name" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
756 |
FROM pg_catalog.pg_class c |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
757 |
JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
758 |
JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
759 |
LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
760 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
761 |
WHERE c.relkind IN ('i','') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
762 |
AND c2.relname = '%s' |
11019
36838a012ccc
[dataimport] massive_store: ignore primary key when requesting indexes on a table
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11018
diff
changeset
|
763 |
AND i.indisprimary = FALSE |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
764 |
AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
765 |
AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
766 |
indexes_list = self.cnx.system_sql(sql).fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
767 |
indexes = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
768 |
for name, in indexes_list: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
769 |
indexes[name] = self.index_query(name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
770 |
return indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
771 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
772 |
def application_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
773 |
""" Iterate over all the constraints """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
774 |
sql = '''SELECT i.conname as "Name" |
11018
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
775 |
FROM pg_catalog.pg_class c |
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
776 |
JOIN pg_catalog.pg_constraint i ON i.conrelid = c.oid |
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
777 |
JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
778 |
LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
779 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
11018
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
780 |
WHERE |
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
781 |
c2.relname = '%s' |
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
782 |
AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
783 |
AND pg_catalog.pg_table_is_visible(c.oid) |
1df243392d45
[dataimport] massive_store: reformat sql query
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
10929
diff
changeset
|
784 |
''' % tablename |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
785 |
indexes_list = self.cnx.system_sql(sql).fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
786 |
constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
787 |
for name, in indexes_list: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
788 |
query = self.constraint_query(name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
789 |
constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
790 |
return constraints |