author | Julien Cristau <julien.cristau@logilab.fr> |
Mon, 09 Nov 2015 15:22:43 +0100 | |
changeset 10869 | 575982c948a9 |
parent 10867 | ca73ee6d24ad |
child 10870 | 9dedf464596b |
permissions | -rw-r--r-- |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# coding: utf-8 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# This file is part of CubicWeb. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# terms of the GNU Lesser General Public License as published by the Free |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# any later version. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# details. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# You should have received a copy of the GNU Lesser General Public License along |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
import logging |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
from datetime import datetime |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
from collections import defaultdict |
10854
f437787d8849
[dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents:
10853
diff
changeset
|
23 |
from io import StringIO |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
24 |
|
10859
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
25 |
from six.moves import range |
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
26 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
27 |
from yams.constraints import SizeConstraint |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
29 |
from psycopg2 import ProgrammingError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
31 |
from cubicweb.dataimport import stores, pgstore |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
32 |
from cubicweb.utils import make_uid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
from cubicweb.server.sqlutils import SQL_PREFIX |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
class MassiveObjectStore(stores.RQLObjectStore): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
Store for massive import of data, with delayed insertion of meta data. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
WARNINGS: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
- This store may be only used with PostgreSQL for now, as it relies |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
on the COPY FROM method, and on specific PostgreSQL tables to get all |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
the indexes. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
- This store can only insert relations that are not inlined (i.e., |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
which do *not* have inlined=True in their definition in the schema). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
It should be used as follows: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
store = MassiveObjectStore(cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
store.init_rtype_table('Person', 'lives_in', 'Location') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
store.create_entity('Person', subj_iid_attribute=person_iid, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
store.create_entity('Location', obj_iid_attribute=location_iid, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
# subj_iid_attribute and obj_iid_attribute are argument names |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
# chosen by the user (e.g. "cwuri"). These names can be identical. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
# person_iid and location_iid are unique IDs and depend on the data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
61 |
# (e.g URI). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
store.relate_by_iid(person_iid, 'lives_in', location_iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
# For example: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
65 |
store.create_entity('Person', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
cwuri='http://dbpedia.org/toto', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
67 |
name='Toto') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
68 |
store.create_entity('Location', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
69 |
uri='http://geonames.org/11111', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
70 |
name='Somewhere') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
72 |
store.relate_by_iid('http://dbpedia.org/toto', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
'lives_in', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
74 |
'http://geonames.org/11111') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
75 |
# Finally |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
store.flush_meta_data() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
store.convert_relations('Person', 'lives_in', 'Location', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
78 |
'subj_iid_attribute', 'obj_iid_attribute') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
79 |
# For the previous example: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
82 |
store.cleanup() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
83 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
84 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
85 |
def __init__(self, cnx, autoflush_metadata=True, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
86 |
replace_sep='', commit_at_flush=True, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
87 |
pg_schema='public', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
88 |
iid_maxsize=1024, uri_param_name='rdf:about', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
89 |
eids_seq_range=10000, eids_seq_start=None, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
90 |
on_commit_callback=None, on_rollback_callback=None, |
10865
2537df9fdd27
[dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10864
diff
changeset
|
91 |
slave_mode=False, |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
92 |
source=None): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
""" Create a MassiveObject store, with the following attributes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
94 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
95 |
- cnx: CubicWeb cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
96 |
- autoflush_metadata: Boolean. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
Automatically flush the metadata after |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
each flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
- replace_sep: String. Replace separator used for |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
(COPY FROM) buffer creation. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
101 |
- commit_at_flush: Boolean. Commit after each flush(). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
- eids_seq_range: Int. Range of the eids_seq_range to be fetched each time |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
by the store (default is 10000). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
If None, the sequence eids is attached to each entity tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
105 |
(backward compatibility with the 0.2.0). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
106 |
- eids_seq_start: Int. Set the eids sequence value (if None, nothing is done). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
107 |
- iid_maxsize: Int. Max size of the iid, used to create the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
108 |
iid_eid convertion table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
109 |
- uri_param_name: String. If given, will use this parameter to get cw_uri |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
110 |
for entities. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
111 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
112 |
super(MassiveObjectStore, self).__init__(cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
113 |
self.logger = logging.getLogger('dataio.relationmixin') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
114 |
self._cnx = cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
115 |
self.sql = cnx.system_sql |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
116 |
self.iid_maxsize = iid_maxsize |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
117 |
self.replace_sep = replace_sep |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
118 |
self.commit_at_flush = commit_at_flush |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
119 |
self._data_uri_relations = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
120 |
self._initialized = {'init_uri_eid': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
121 |
'uri_eid_inserted': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
122 |
'uri_rtypes': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
123 |
'entities': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
124 |
'rtypes': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
125 |
} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
self.sql = self._cnx.system_sql |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
127 |
self.logger = logging.getLogger('dataio.massiveimport') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
128 |
self.autoflush_metadata = autoflush_metadata |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
129 |
self.replace_sep = replace_sep |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
self.slave_mode = slave_mode |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
131 |
self.size_constraints = get_size_constraints(cnx.vreg.schema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
132 |
self.default_values = get_default_values(cnx.vreg.schema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
133 |
self._dbh = PGHelper(self._cnx, pg_schema or 'public') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
134 |
self._data_entities = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
135 |
self._data_relations = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
136 |
self._now = datetime.now() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
137 |
self._default_cwuri = make_uid('_auto_generated') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
138 |
self.uri_param_name = uri_param_name |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
139 |
self._count_cwuri = 0 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
140 |
self.commit_at_flush = commit_at_flush |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
141 |
self.on_commit_callback = on_commit_callback |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
142 |
self.on_rollback_callback = on_rollback_callback |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
143 |
# Initialized the meta tables of dataio for warm restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
144 |
self._init_dataio_metatables() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
145 |
# Internal markers of initialization |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
146 |
self._eids_seq_range = eids_seq_range |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
147 |
self._eids_seq_start = eids_seq_start |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
148 |
if self._eids_seq_start is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
149 |
self._cnx.system_sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
150 |
'entities_id_seq', initial_value=self._eids_seq_start + 1)) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
151 |
cnx.commit() |
10855
cd91f46fa633
[dataimport] use next builtin instead of next method on iterators
Julien Cristau <julien.cristau@logilab.fr>
parents:
10854
diff
changeset
|
152 |
self.get_next_eid = lambda g=self._get_eid_gen(): next(g) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
153 |
# recreate then when self.finish() is called |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
154 |
if not self.slave_mode: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
155 |
self._drop_metatables_constraints() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
156 |
if source is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
157 |
source = cnx.repo.system_source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
158 |
self.source = source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
159 |
self._etype_eid_idx = dict(cnx.execute('Any XN,X WHERE X is CWEType, X name XN')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
160 |
cnx.read_security = False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
161 |
cnx.write_security = False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
162 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
163 |
### INIT FUNCTIONS ######################################################## |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
164 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
165 |
def init_rtype_table(self, etype_from, rtype, etype_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
166 |
""" Build temporary table a for standard rtype """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
167 |
# Create an uri_eid table for each etype for a better |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
168 |
# control of which etype is concerns for a particular |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
169 |
# possibly multivalued relation. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
170 |
for etype in (etype_from, etype_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
171 |
if etype and etype not in self._initialized['init_uri_eid']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
172 |
self._init_uri_eid_table(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
173 |
if rtype not in self._initialized['uri_rtypes']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
174 |
# Create the temporary tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
175 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
176 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
177 |
sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
178 |
'varying(%(s)s), uri_to character varying(%(s)s))' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
179 |
self.sql(sql % {'r': rtype, 's': self.iid_maxsize}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
180 |
except ProgrammingError: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
181 |
# XXX Already exist (probably due to multiple import) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
182 |
pass |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
183 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
184 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
185 |
#Add it to the initialized set |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
186 |
self._initialized['uri_rtypes'].add(rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
187 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
def _init_uri_eid_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
189 |
""" Build a temporary table for id/eid convertion |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
190 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
192 |
sql = "CREATE TABLE uri_eid_%(e)s (uri character varying(%(size)s), eid integer)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
193 |
self.sql(sql % {'e': etype.lower(), 'size': self.iid_maxsize,}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
194 |
except ProgrammingError: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
195 |
# XXX Already exist (probably due to multiple import) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
196 |
pass |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
197 |
# Add it to the initialized set |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
198 |
self._initialized['init_uri_eid'].add(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
199 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
200 |
def _init_dataio_metatables(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
201 |
""" Initialized the meta tables of dataio for warm restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
202 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
203 |
# Check if dataio tables are not already created (i.e. a restart) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
self._initialized_table_created = self._dbh.table_exists('dataio_initialized') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
self._constraint_table_created = self._dbh.table_exists('dataio_constraints') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
self._metadata_table_created = self._dbh.table_exists('dataio_metadata') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
208 |
### RELATE FUNCTION ####################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
def relate_by_iid(self, iid_from, rtype, iid_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
"""Add new relation based on the internal id (iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
212 |
of the entities (not the eid)""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
# Push data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
214 |
if isinstance(iid_from, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
iid_from = iid_from.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
216 |
if isinstance(iid_to, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
iid_to = iid_to.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
### FLUSH FUNCTIONS ####################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
221 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
def flush_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
224 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
225 |
for rtype, data in self._data_uri_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
226 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
227 |
self.logger.info('No data for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data])) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
229 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
self.logger.info('Empty Buffer for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
null='NULL', columns=('uri_from', 'uri_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
buf.close() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
self._data_uri_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
241 |
# Commit if asked |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
if self.commit_at_flush: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
def fill_uri_eid_table(self, etype, uri_label): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
246 |
""" Fill the uri_eid table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
248 |
self.logger.info('Fill uri_eid for etype %s', etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
sql = 'INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
250 |
self.sql(sql % {'l': uri_label, 'e': etype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
251 |
# Add indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
252 |
self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
253 |
# Set the etype as converted |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
254 |
self._initialized['uri_eid_inserted'].add(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
255 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
256 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
257 |
def convert_relations(self, etype_from, rtype, etype_to, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
258 |
uri_label_from='cwuri', uri_label_to='cwuri'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
259 |
""" Flush the converted relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
260 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
261 |
# Always flush relations to be sure |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
262 |
self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
263 |
self.flush_relations() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
264 |
if uri_label_from and etype_from not in self._initialized['uri_eid_inserted']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
265 |
self.fill_uri_eid_table(etype_from, uri_label_from) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
266 |
if uri_label_to and etype_to not in self._initialized['uri_eid_inserted']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
267 |
self.fill_uri_eid_table(etype_to, uri_label_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
268 |
if self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
269 |
self.logger.warning("Can't insert inlined relation %s", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
270 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
271 |
if uri_label_from and uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
272 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
273 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
274 |
WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
275 |
SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
elif uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
CAST(T.uri_from AS INTEGER), O1.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
280 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
WHERE O1.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
elif uri_label_from: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
286 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
287 |
O1.eid, CAST(T.uri_to AS INTEGER) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
288 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
WHERE O1.uri=T.uri_from AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER)); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
292 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
self.sql(sql % {'r': rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
'et': etype_to.lower() if etype_to else u'', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
'ef': etype_from.lower() if etype_from else u''}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
except Exception as ex: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
298 |
self.logger.error("Can't insert relation %s: %s", rtype, ex) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
299 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
300 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
### SQL UTILITIES ######################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
def drop_and_store_indexes_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
304 |
# Drop indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
305 |
if not self._constraint_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
306 |
# Create a table to save the constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
# Allow reload even after crash |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
sql = "CREATE TABLE dataio_constraints (origtable text, query text, type varchar(256))" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
310 |
self._constraint_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
self._drop_table_constraints_indexes(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
312 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
def _drop_table_constraints_indexes(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
314 |
""" Drop and store table constraints and indexes """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
indexes, constraints = self._dbh.application_indexes_constraints(tablename) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
316 |
for name, query in constraints.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
sql = 'INSERT INTO dataio_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
318 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'constraint'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
319 |
sql = 'ALTER TABLE %s DROP CONSTRAINT %s CASCADE' % (tablename, name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
320 |
self.sql(sql) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
321 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
322 |
sql = 'INSERT INTO dataio_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
323 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'index'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
324 |
sql = 'DROP INDEX %s' % name |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
325 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
326 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
327 |
def reapply_constraint_index(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
if not self._dbh.table_exists('dataio_constraints'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
self.logger.info('The table dataio_constraints does not exist ' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
'(keep_index option should be True)') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
332 |
sql = 'SELECT query FROM dataio_constraints WHERE origtable = %(e)s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
333 |
crs = self.sql(sql, {'e': tablename}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
for query, in crs.fetchall(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
335 |
self.sql(query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
336 |
self.sql('DELETE FROM dataio_constraints WHERE origtable = %(e)s ' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
337 |
'AND query = %(q)s', {'e': tablename, 'q': query}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
338 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
339 |
def _drop_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
340 |
""" Drop all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
341 |
for tablename in ('created_by_relation', 'owned_by_relation', |
10861
6205b89c3af6
[massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10859
diff
changeset
|
342 |
'is_instance_of_relation', 'is_relation', |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
343 |
'entities'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
344 |
self.drop_and_store_indexes_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
345 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
346 |
def _create_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
347 |
""" Create all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
for tablename in ('entities', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
'created_by_relation', 'owned_by_relation', |
10861
6205b89c3af6
[massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10859
diff
changeset
|
350 |
'is_instance_of_relation', 'is_relation'): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
351 |
# Indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
352 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
353 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
354 |
def init_relation_table(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
""" Get and remove all indexes for performance sake """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
# Create temporary table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
357 |
if not self.slave_mode and rtype not in self._initialized['rtypes']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
358 |
sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
self.sql(sql) |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
360 |
# Drop indexes and constraints |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
361 |
tablename = '%s_relation' % rtype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
362 |
self.drop_and_store_indexes_constraints(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
363 |
# Push the etype in the initialized table for easier restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
364 |
self.init_create_initialized_table() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
365 |
sql = 'INSERT INTO dataio_initialized VALUES (%(e)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
366 |
self.sql(sql, {'e': rtype, 't': 'rtype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
367 |
# Mark rtype as "initialized" for faster check |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
368 |
self._initialized['rtypes'].add(rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
369 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
def init_create_initialized_table(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
371 |
""" Create the dataio initialized table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
372 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
if not self._initialized_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
374 |
sql = "CREATE TABLE dataio_initialized (retype text, type varchar(128))" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
375 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
376 |
self._initialized_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
377 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
378 |
def init_etype_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
379 |
""" Add eid sequence to a particular etype table and |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
380 |
remove all indexes for performance sake """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
381 |
if etype not in self._initialized['entities']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
382 |
# Only for non-initialized etype and not slave mode store |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
383 |
if not self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
384 |
if self._eids_seq_range is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
385 |
# Eids are directly set by the entities_id_seq. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
386 |
# We attach this sequence to all the created etypes. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
387 |
sql = ("ALTER TABLE cw_%s ALTER COLUMN cw_eid " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
388 |
"SET DEFAULT nextval('entities_id_seq')" % etype.lower()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
389 |
self.sql(sql) |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
390 |
# Drop indexes and constraints |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
391 |
tablename = 'cw_%s' % etype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
392 |
self.drop_and_store_indexes_constraints(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
393 |
# Push the etype in the initialized table for easier restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
394 |
self.init_create_initialized_table() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
395 |
sql = 'INSERT INTO dataio_initialized VALUES (%(e)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
396 |
self.sql(sql, {'e': etype, 't': 'etype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
397 |
# Mark etype as "initialized" for faster check |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
398 |
self._initialized['entities'].add(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
399 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
400 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
401 |
### ENTITIES CREATION ##################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
402 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
403 |
def _get_eid_gen(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
404 |
""" Function getting the next eid. This is done by preselecting |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
405 |
a given number of eids from the 'entities_id_seq', and then |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
406 |
storing them""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
407 |
while True: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
408 |
last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self._eids_seq_range) |
10859
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
409 |
for eid in range(last_eid - self._eids_seq_range + 1, last_eid + 1): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
410 |
yield eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
411 |
|
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
412 |
def _apply_size_constraints(self, etype, kwargs): |
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
413 |
"""Apply the size constraints for a given etype, attribute and value.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
414 |
size_constraints = self.size_constraints[etype] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
415 |
for attr, value in kwargs.items(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
416 |
if value: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
417 |
maxsize = size_constraints.get(attr) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
418 |
if maxsize is not None and len(value) > maxsize: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
419 |
kwargs[attr] = value[:maxsize-4] + '...' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
420 |
|
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
421 |
def _apply_default_values(self, etype, kwargs): |
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
422 |
"""Apply the default values for a given etype, attribute and value.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
423 |
default_values = self.default_values[etype] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
424 |
missing_keys = set(default_values) - set(kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
425 |
kwargs.update((key, default_values[key]) for key in missing_keys) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
426 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
427 |
# store api ################################################################ |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
428 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
429 |
def prepare_insert_entity(self, etype, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
430 |
"""Given an entity type, attributes and inlined relations, returns the inserted entity's |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
431 |
eid. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
432 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
433 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
434 |
self.init_etype_table(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
435 |
# Add meta data if not given |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
436 |
if 'modification_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
437 |
kwargs['modification_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
438 |
if 'creation_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
439 |
kwargs['creation_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
440 |
if 'cwuri' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
441 |
if self.uri_param_name and self.uri_param_name in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
442 |
kwargs['cwuri'] = kwargs[self.uri_param_name] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
443 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
444 |
kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
445 |
self._count_cwuri += 1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
446 |
if 'eid' not in kwargs and self._eids_seq_range is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
447 |
# If eid is not given and the eids sequence is set, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
448 |
# use the value from the sequence |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
449 |
kwargs['eid'] = self.get_next_eid() |
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
450 |
self._apply_size_constraints(etype, kwargs) |
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
451 |
self._apply_default_values(etype, kwargs) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
452 |
self._data_entities[etype].append(kwargs) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
453 |
return kwargs.get('eid') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
454 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
455 |
def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
456 |
"""Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
457 |
and ``eid_to``. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
458 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
459 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
460 |
self.init_relation_table(rtype) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
461 |
self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
462 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
463 |
def flush(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
464 |
"""Flush the data""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
465 |
self.flush_entities() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
466 |
self.flush_internal_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
467 |
self.flush_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
468 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
469 |
def commit(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
470 |
"""Commit the database transaction.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
471 |
self.on_commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
472 |
super(MassiveObjectStore, self).commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
473 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
474 |
def finish(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
475 |
"""Remove temporary tables and columns.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
476 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
477 |
if self.slave_mode: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
478 |
raise RuntimeError('Store cleanup is not allowed in slave mode') |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
479 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
480 |
# Cleanup relations tables |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
481 |
for etype in self._initialized['init_uri_eid']: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
482 |
self.sql('DROP TABLE uri_eid_%s' % etype.lower()) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
483 |
# Remove relations tables |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
484 |
for rtype in self._initialized['uri_rtypes']: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
485 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
486 |
self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
487 |
else: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
488 |
self.logger.warning("inlined relation %s: no cleanup to be done for it" % rtype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
489 |
self.commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
490 |
# Get all the initialized etypes/rtypes |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
491 |
if self._dbh.table_exists('dataio_initialized'): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
492 |
crs = self.sql('SELECT retype, type FROM dataio_initialized') |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
493 |
for retype, _type in crs.fetchall(): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
494 |
self.logger.info('Cleanup for %s' % retype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
495 |
if _type == 'etype': |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
496 |
# Cleanup entities tables - Recreate indexes |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
497 |
self._cleanup_entities(retype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
498 |
elif _type == 'rtype': |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
499 |
# Cleanup relations tables |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
500 |
self._cleanup_relations(retype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
501 |
self.sql('DELETE FROM dataio_initialized WHERE retype = %(e)s', |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
502 |
{'e': retype}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
503 |
# Create meta constraints (entities, is_instance_of, ...) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
504 |
self._create_metatables_constraints() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
505 |
self.commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
506 |
# Delete the meta data table |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
507 |
for table_name in ('dataio_initialized', 'dataio_constraints', 'dataio_metadata'): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
508 |
if self._dbh.table_exists(table_name): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
509 |
self.sql('DROP TABLE %s' % table_name) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
510 |
self.commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
511 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
512 |
### FLUSH ################################################################# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
513 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
514 |
def on_commit(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
515 |
if self.on_commit_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
516 |
self.on_commit_callback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
517 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
518 |
def on_rollback(self, exc, etype, data): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
519 |
if self.on_rollback_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
520 |
self.on_rollback_callback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
521 |
self._cnx.rollback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
522 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
523 |
raise exc |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
524 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
525 |
def flush_internal_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
526 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
527 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
528 |
for rtype, data in self._data_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
529 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
530 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
531 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
532 |
buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to'), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
533 |
replace_sep=self.replace_sep) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
534 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
535 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
536 |
raise ValueError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
537 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
538 |
# Push into the tmp table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
539 |
cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
540 |
null='NULL', columns=('eid_from', 'eid_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
541 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
542 |
self._data_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
543 |
# Commit if asked |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
544 |
if self.commit_at_flush: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
545 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
546 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
547 |
def flush_entities(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
548 |
""" Flush the entities data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
549 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
550 |
for etype, data in self._data_entities.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
551 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
552 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
553 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
554 |
# XXX It may be interresting to directly infer the columns' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
555 |
# names from the schema instead of using .keys() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
556 |
columns = data[0].keys() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
557 |
# XXX For now, the _create_copyfrom_buffer does a "row[column]" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
558 |
# which can lead to a key error. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
559 |
# Thus we should create dictionary with all the keys. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
560 |
columns = set() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
561 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
562 |
columns.update(d.keys()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
563 |
_data = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
564 |
_base_data = dict.fromkeys(columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
565 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
566 |
_d = _base_data.copy() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
567 |
_d.update(d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
568 |
_data.append(_d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
569 |
buf = pgstore._create_copyfrom_buffer(_data, columns, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
570 |
replace_sep=self.replace_sep) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
571 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
572 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
573 |
raise ValueError('Error in buffer creation for etype %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
574 |
columns = ['cw_%s' % attr for attr in columns] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
575 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
576 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
577 |
cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
578 |
except Exception as exc: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
579 |
self.on_rollback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
580 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
581 |
self._data_entities[etype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
582 |
if self.autoflush_metadata: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
583 |
self.flush_meta_data() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
584 |
# Commit if asked |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
585 |
if self.commit_at_flush: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
586 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
587 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
588 |
def flush_meta_data(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
589 |
""" Flush the meta data (entities table, is_instance table, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
590 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
591 |
if self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
592 |
raise RuntimeError('Flushing meta data is not allow in slave mode') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
593 |
if not self._dbh.table_exists('dataio_initialized'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
594 |
self.logger.info('No information available for initialized etypes/rtypes') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
595 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
596 |
if not self._metadata_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
597 |
# Keep the correctly flush meta data in database |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
598 |
sql = "CREATE TABLE dataio_metadata (etype text)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
599 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
600 |
self._metadata_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
601 |
crs = self.sql('SELECT etype FROM dataio_metadata') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
602 |
already_flushed = set(e for e, in crs.fetchall()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
603 |
crs = self.sql('SELECT retype FROM dataio_initialized WHERE type = %(t)s', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
604 |
{'t': 'etype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
605 |
all_etypes = set(e for e, in crs.fetchall()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
606 |
for etype in all_etypes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
607 |
if etype not in already_flushed: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
608 |
# Deals with meta data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
609 |
self.logger.info('Flushing meta data for %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
610 |
self.insert_massive_meta_data(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
611 |
sql = 'INSERT INTO dataio_metadata VALUES (%(e)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
612 |
self.sql(sql, {'e': etype}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
613 |
# Final commit |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
614 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
615 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
616 |
def _cleanup_entities(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
617 |
""" Cleanup etype table """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
618 |
if self._eids_seq_range is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
619 |
# Remove DEFAULT eids sequence if added |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
620 |
sql = 'ALTER TABLE cw_%s ALTER COLUMN cw_eid DROP DEFAULT;' % etype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
621 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
622 |
# Create indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
623 |
tablename = SQL_PREFIX + etype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
624 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
625 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
626 |
def _cleanup_relations(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
627 |
""" Cleanup rtype table """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
628 |
# Push into relation table while removing duplicate |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
629 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
630 |
T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
631 |
WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
632 |
TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);''' % {'r': rtype} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
633 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
634 |
# Drop temporary relation table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
635 |
sql = ('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
636 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
637 |
# Create indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
638 |
tablename = '%s_relation' % rtype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
639 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
640 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
641 |
def insert_massive_meta_data(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
642 |
""" Massive insertion of meta data for a given etype, based on SQL statements. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
643 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
644 |
# Push data - Use coalesce to avoid NULL (and get 0), if there is no |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
645 |
# entities of this type in the entities table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
646 |
# Meta data relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
647 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'created_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
648 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'owned_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
649 |
self.metagen_push_relation(etype, self.source.eid, 'cw_source_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
650 |
self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
651 |
self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_instance_of_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
652 |
sql = ("INSERT INTO entities (eid, type, asource, extid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
653 |
"SELECT cw_eid, '%s', 'system', NULL FROM cw_%s " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
654 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
655 |
% (etype, etype.lower())) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
656 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
657 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
658 |
def metagen_push_relation(self, etype, eid_to, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
659 |
sql = ("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
660 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
661 |
% (rtype, eid_to, etype.lower())) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
662 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
663 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
664 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
665 |
### CONSTRAINTS MANAGEMENT FUNCTIONS ########################################## |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
666 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
667 |
def get_size_constraints(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
668 |
"""analyzes yams ``schema`` and returns the list of size constraints. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
669 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
670 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
671 |
sub-dictionnaries mapping attribute names -> max size. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
672 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
673 |
size_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
674 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
675 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
676 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
677 |
size_constraints[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
678 |
for rschema, aschema in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
679 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
680 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
681 |
maxsize = None |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
682 |
rdef = rschema.rdef(eschema, aschema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
683 |
for constraint in rdef.constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
684 |
if isinstance(constraint, SizeConstraint): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
685 |
maxsize = constraint.max |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
686 |
eschema_constraints[rschema.type] = maxsize |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
687 |
return size_constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
688 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
689 |
def get_default_values(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
690 |
"""analyzes yams ``schema`` and returns the list of default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
691 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
692 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
693 |
sub-dictionnaries mapping attribute names -> default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
694 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
695 |
default_values = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
696 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
697 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
698 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
699 |
default_values[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
700 |
for rschema, _ in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
701 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
702 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
703 |
if eschema.default(rschema.type) is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
704 |
eschema_constraints[rschema.type] = eschema.default(rschema.type) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
705 |
return default_values |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
706 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
707 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
708 |
class PGHelper(object): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
709 |
def __init__(self, cnx, pg_schema='public'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
710 |
self.cnx = cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
711 |
# Deals with pg schema, see #3216686 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
712 |
self.pg_schema = pg_schema |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
713 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
714 |
def application_indexes_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
715 |
""" Get all the indexes/constraints for a given tablename """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
716 |
indexes = self.application_indexes(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
717 |
constraints = self.application_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
718 |
_indexes = {} |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
719 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
720 |
# Remove pkey indexes (automatically created by constraints) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
721 |
# Specific cases of primary key, see #3224079 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
722 |
if name not in constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
723 |
_indexes[name] = query |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
724 |
return _indexes, constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
725 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
726 |
def table_exists(self, table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
727 |
sql = "SELECT * from information_schema.tables WHERE table_name=%(t)s AND table_schema=%(s)s" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
728 |
crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
729 |
res = crs.fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
730 |
if res: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
731 |
return True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
732 |
return False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
733 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
734 |
# def check_if_primary_key_exists_for_table(self, table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
735 |
# sql = ("SELECT constraint_name FROM information_schema.table_constraints " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
736 |
# "WHERE constraint_type = 'PRIMARY KEY' AND table_name=%(t)s AND table_schema=%(s)s") |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
737 |
# crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
738 |
# res = crs.fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
739 |
# if res: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
740 |
# return True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
741 |
# return False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
742 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
743 |
def index_query(self, name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
744 |
"""Get the request to be used to recreate the index""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
745 |
return self.cnx.system_sql("SELECT pg_get_indexdef(c.oid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
746 |
"from pg_catalog.pg_class c " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
747 |
"LEFT JOIN pg_catalog.pg_namespace n " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
748 |
"ON n.oid = c.relnamespace " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
749 |
"WHERE c.relname = %(r)s AND n.nspname=%(n)s", |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
750 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
751 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
752 |
def constraint_query(self, name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
753 |
"""Get the request to be used to recreate the constraint""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
754 |
return self.cnx.system_sql("SELECT pg_get_constraintdef(c.oid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
755 |
"from pg_catalog.pg_constraint c " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
756 |
"LEFT JOIN pg_catalog.pg_namespace n " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
757 |
"ON n.oid = c.connamespace " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
758 |
"WHERE c.conname = %(r)s AND n.nspname=%(n)s", |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
759 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
760 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
761 |
def application_indexes(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
762 |
""" Iterate over all the indexes """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
763 |
# This SQL query (cf http://www.postgresql.org/message-id/432F450F.4080700@squiz.net) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
764 |
# aims at getting all the indexes for each table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
765 |
sql = '''SELECT c.relname as "Name" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
766 |
FROM pg_catalog.pg_class c |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
767 |
JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
768 |
JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
769 |
LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
770 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
771 |
WHERE c.relkind IN ('i','') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
772 |
AND c2.relname = '%s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
773 |
AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
774 |
AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
775 |
indexes_list = self.cnx.system_sql(sql).fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
776 |
indexes = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
777 |
for name, in indexes_list: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
778 |
indexes[name] = self.index_query(name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
779 |
return indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
780 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
781 |
def application_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
782 |
""" Iterate over all the constraints """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
783 |
sql = '''SELECT i.conname as "Name" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
784 |
FROM pg_catalog.pg_class c JOIN pg_catalog.pg_constraint i |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
785 |
ON i.conrelid = c.oid JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
786 |
LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
787 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
788 |
WHERE c2.relname = '%s' AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
789 |
AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
790 |
indexes_list = self.cnx.system_sql(sql).fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
791 |
constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
792 |
for name, in indexes_list: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
793 |
query = self.constraint_query(name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
794 |
constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
795 |
return constraints |