author | Julien Cristau <julien.cristau@logilab.fr> |
Wed, 21 Oct 2015 20:17:44 +0200 | |
changeset 10856 | b839167d99a4 |
parent 10855 | cd91f46fa633 |
child 10859 | 375a8232e61c |
permissions | -rw-r--r-- |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# coding: utf-8 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
2 |
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# This file is part of CubicWeb. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# terms of the GNU Lesser General Public License as published by the Free |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# any later version. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# details. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# You should have received a copy of the GNU Lesser General Public License along |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
import logging |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
from datetime import datetime |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
from collections import defaultdict |
10854
f437787d8849
[dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents:
10853
diff
changeset
|
23 |
from io import StringIO |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
24 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
from yams.constraints import SizeConstraint |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
26 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
27 |
from psycopg2 import ProgrammingError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
28 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
29 |
from cubicweb.dataimport import stores, pgstore |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
from cubicweb.utils import make_uid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
31 |
from cubicweb.server.sqlutils import SQL_PREFIX |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
32 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
34 |
class MassiveObjectStore(stores.RQLObjectStore): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
Store for massive import of data, with delayed insertion of meta data. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
WARNINGS: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
- This store may be only used with PostgreSQL for now, as it relies |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
on the COPY FROM method, and on specific PostgreSQL tables to get all |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
the indexes. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
- This store can only insert relations that are not inlined (i.e., |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
which do *not* have inlined=True in their definition in the schema). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
44 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
45 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
46 |
It should be used as follows: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
store = MassiveObjectStore(cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
49 |
store.init_rtype_table('Person', 'lives_in', 'Location') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
50 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
51 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
52 |
store.create_entity('Person', subj_iid_attribute=person_iid, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
53 |
store.create_entity('Location', obj_iid_attribute=location_iid, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
# subj_iid_attribute and obj_iid_attribute are argument names |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
# chosen by the user (e.g. "cwuri"). These names can be identical. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
# person_iid and location_iid are unique IDs and depend on the data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
59 |
# (e.g URI). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
60 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
61 |
store.relate_by_iid(person_iid, 'lives_in', location_iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
# For example: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
store.create_entity('Person', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
cwuri='http://dbpedia.org/toto', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
65 |
name='Toto') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
store.create_entity('Location', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
67 |
uri='http://geonames.org/11111', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
68 |
name='Somewhere') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
69 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
70 |
store.relate_by_iid('http://dbpedia.org/toto', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
'lives_in', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
72 |
'http://geonames.org/11111') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
73 |
# Finally |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
74 |
store.flush_meta_data() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
75 |
store.convert_relations('Person', 'lives_in', 'Location', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
'subj_iid_attribute', 'obj_iid_attribute') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
# For the previous example: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
78 |
store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
79 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
store.cleanup() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
82 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
83 |
def __init__(self, cnx, autoflush_metadata=True, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
84 |
replace_sep='', commit_at_flush=True, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
85 |
drop_index=True, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
86 |
pg_schema='public', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
87 |
iid_maxsize=1024, uri_param_name='rdf:about', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
88 |
eids_seq_range=10000, eids_seq_start=None, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
89 |
on_commit_callback=None, on_rollback_callback=None, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
90 |
slave_mode=False, build_entities=False, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
91 |
source=None): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
92 |
""" Create a MassiveObject store, with the following attributes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
94 |
- cnx: CubicWeb cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
95 |
- autoflush_metadata: Boolean. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
96 |
Automatically flush the metadata after |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
each flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
- replace_sep: String. Replace separator used for |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
(COPY FROM) buffer creation. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
100 |
- commit_at_flush: Boolean. Commit after each flush(). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
101 |
- drop_index: Boolean. Drop SQL index before COPY FROM |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
- eids_seq_range: Int. Range of the eids_seq_range to be fetched each time |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
103 |
by the store (default is 10000). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
104 |
If None, the sequence eids is attached to each entity tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
105 |
(backward compatibility with the 0.2.0). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
106 |
- eids_seq_start: Int. Set the eids sequence value (if None, nothing is done). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
107 |
- iid_maxsize: Int. Max size of the iid, used to create the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
108 |
iid_eid convertion table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
109 |
- uri_param_name: String. If given, will use this parameter to get cw_uri |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
110 |
for entities. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
111 |
- build_entities: Boolean. If True, create_entity returns a CW etype object |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
112 |
(but WITHOUT eid !). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
113 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
114 |
super(MassiveObjectStore, self).__init__(cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
115 |
self.logger = logging.getLogger('dataio.relationmixin') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
116 |
self._cnx = cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
117 |
self.sql = cnx.system_sql |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
118 |
self.iid_maxsize = iid_maxsize |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
119 |
self.replace_sep = replace_sep |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
120 |
self.commit_at_flush = commit_at_flush |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
121 |
self._data_uri_relations = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
122 |
self._initialized = {'init_uri_eid': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
123 |
'uri_eid_inserted': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
124 |
'uri_rtypes': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
125 |
'entities': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
126 |
'rtypes': set(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
127 |
} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
128 |
self.sql = self._cnx.system_sql |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
129 |
self.logger = logging.getLogger('dataio.massiveimport') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
130 |
self.autoflush_metadata = autoflush_metadata |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
131 |
self.replace_sep = replace_sep |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
132 |
self.drop_index = drop_index |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
133 |
self.slave_mode = slave_mode |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
134 |
self.size_constraints = get_size_constraints(cnx.vreg.schema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
135 |
self.default_values = get_default_values(cnx.vreg.schema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
136 |
self._dbh = PGHelper(self._cnx, pg_schema or 'public') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
137 |
self._build_entities = build_entities |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
138 |
self._data_entities = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
139 |
self._data_relations = defaultdict(list) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
140 |
self._now = datetime.now() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
141 |
self._default_cwuri = make_uid('_auto_generated') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
142 |
self.uri_param_name = uri_param_name |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
143 |
self._count_cwuri = 0 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
144 |
self.commit_at_flush = commit_at_flush |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
145 |
self.on_commit_callback = on_commit_callback |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
146 |
self.on_rollback_callback = on_rollback_callback |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
147 |
# Initialized the meta tables of dataio for warm restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
148 |
self._init_dataio_metatables() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
149 |
# Internal markers of initialization |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
150 |
self._eids_seq_range = eids_seq_range |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
151 |
self._eids_seq_start = eids_seq_start |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
152 |
if self._eids_seq_start is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
153 |
self._cnx.system_sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
154 |
'entities_id_seq', initial_value=self._eids_seq_start + 1)) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
155 |
cnx.commit() |
10855
cd91f46fa633
[dataimport] use next builtin instead of next method on iterators
Julien Cristau <julien.cristau@logilab.fr>
parents:
10854
diff
changeset
|
156 |
self.get_next_eid = lambda g=self._get_eid_gen(): next(g) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
157 |
# recreate then when self.cleanup() is called |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
158 |
if not self.slave_mode and self.drop_index: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
159 |
self._drop_metatables_constraints() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
160 |
if source is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
161 |
source = cnx.repo.system_source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
162 |
self.source = source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
163 |
self._etype_eid_idx = dict(cnx.execute('Any XN,X WHERE X is CWEType, X name XN')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
164 |
cnx.read_security = False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
165 |
cnx.write_security = False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
166 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
167 |
### INIT FUNCTIONS ######################################################## |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
168 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
169 |
def init_rtype_table(self, etype_from, rtype, etype_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
170 |
""" Build temporary table a for standard rtype """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
171 |
# Create an uri_eid table for each etype for a better |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
172 |
# control of which etype is concerns for a particular |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
173 |
# possibly multivalued relation. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
174 |
for etype in (etype_from, etype_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
175 |
if etype and etype not in self._initialized['init_uri_eid']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
176 |
self._init_uri_eid_table(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
177 |
if rtype not in self._initialized['uri_rtypes']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
178 |
# Create the temporary tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
179 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
180 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
181 |
sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
182 |
'varying(%(s)s), uri_to character varying(%(s)s))' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
183 |
self.sql(sql % {'r': rtype, 's': self.iid_maxsize}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
184 |
except ProgrammingError: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
185 |
# XXX Already exist (probably due to multiple import) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
186 |
pass |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
187 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
189 |
#Add it to the initialized set |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
190 |
self._initialized['uri_rtypes'].add(rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
192 |
def _init_uri_eid_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
193 |
""" Build a temporary table for id/eid convertion |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
194 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
195 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
196 |
sql = "CREATE TABLE uri_eid_%(e)s (uri character varying(%(size)s), eid integer)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
197 |
self.sql(sql % {'e': etype.lower(), 'size': self.iid_maxsize,}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
198 |
except ProgrammingError: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
199 |
# XXX Already exist (probably due to multiple import) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
200 |
pass |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
201 |
# Add it to the initialized set |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
202 |
self._initialized['init_uri_eid'].add(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
203 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
def _init_dataio_metatables(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
""" Initialized the meta tables of dataio for warm restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
# Check if dataio tables are not already created (i.e. a restart) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
208 |
self._initialized_table_created = self._dbh.table_exists('dataio_initialized') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
self._constraint_table_created = self._dbh.table_exists('dataio_constraints') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
self._metadata_table_created = self._dbh.table_exists('dataio_metadata') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
212 |
### RELATE FUNCTION ####################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
214 |
def relate_by_iid(self, iid_from, rtype, iid_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
"""Add new relation based on the internal id (iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
216 |
of the entities (not the eid)""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
# Push data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
if isinstance(iid_from, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
iid_from = iid_from.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
if isinstance(iid_to, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
221 |
iid_to = iid_to.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
224 |
### FLUSH FUNCTIONS ####################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
225 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
226 |
def flush_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
227 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
229 |
for rtype, data in self._data_uri_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
self.logger.info('No data for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data])) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
self.logger.info('Empty Buffer for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
null='NULL', columns=('uri_from', 'uri_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
241 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
buf.close() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
self._data_uri_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
# Commit if asked |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
246 |
if self.commit_at_flush: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
248 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
def fill_uri_eid_table(self, etype, uri_label): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
250 |
""" Fill the uri_eid table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
251 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
252 |
self.logger.info('Fill uri_eid for etype %s', etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
253 |
sql = 'INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
254 |
self.sql(sql % {'l': uri_label, 'e': etype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
255 |
# Add indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
256 |
self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
257 |
# Set the etype as converted |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
258 |
self._initialized['uri_eid_inserted'].add(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
259 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
260 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
261 |
def convert_relations(self, etype_from, rtype, etype_to, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
262 |
uri_label_from='cwuri', uri_label_to='cwuri'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
263 |
""" Flush the converted relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
264 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
265 |
# Always flush relations to be sure |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
266 |
self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
267 |
self.flush_relations() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
268 |
if uri_label_from and etype_from not in self._initialized['uri_eid_inserted']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
269 |
self.fill_uri_eid_table(etype_from, uri_label_from) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
270 |
if uri_label_to and etype_to not in self._initialized['uri_eid_inserted']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
271 |
self.fill_uri_eid_table(etype_to, uri_label_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
272 |
if self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
273 |
self.logger.warning("Can't insert inlined relation %s", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
274 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
275 |
if uri_label_from and uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
280 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
elif uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
CAST(T.uri_from AS INTEGER), O1.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
WHERE O1.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
286 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
287 |
TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
288 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
elif uri_label_from: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
O1.eid, CAST(T.uri_to AS INTEGER) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
292 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
WHERE O1.uri=T.uri_from AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER)); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
298 |
self.sql(sql % {'r': rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
299 |
'et': etype_to.lower() if etype_to else u'', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
300 |
'ef': etype_from.lower() if etype_from else u''}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
except Exception as ex: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
self.logger.error("Can't insert relation %s: %s", rtype, ex) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
304 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
305 |
### SQL UTILITIES ######################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
306 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
def drop_and_store_indexes_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
# Drop indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
if not self._constraint_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
310 |
# Create a table to save the constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
# Allow reload even after crash |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
312 |
sql = "CREATE TABLE dataio_constraints (origtable text, query text, type varchar(256))" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
314 |
self._constraint_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
self._drop_table_constraints_indexes(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
316 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
def _drop_table_constraints_indexes(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
318 |
""" Drop and store table constraints and indexes """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
319 |
indexes, constraints = self._dbh.application_indexes_constraints(tablename) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
320 |
for name, query in constraints.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
321 |
sql = 'INSERT INTO dataio_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
322 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'constraint'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
323 |
sql = 'ALTER TABLE %s DROP CONSTRAINT %s CASCADE' % (tablename, name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
324 |
self.sql(sql) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
325 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
326 |
sql = 'INSERT INTO dataio_constraints VALUES (%(e)s, %(c)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
327 |
self.sql(sql, {'e': tablename, 'c': query, 't': 'index'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
sql = 'DROP INDEX %s' % name |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
def reapply_constraint_index(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
332 |
if not self._dbh.table_exists('dataio_constraints'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
333 |
self.logger.info('The table dataio_constraints does not exist ' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
'(keep_index option should be True)') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
335 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
336 |
sql = 'SELECT query FROM dataio_constraints WHERE origtable = %(e)s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
337 |
crs = self.sql(sql, {'e': tablename}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
338 |
for query, in crs.fetchall(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
339 |
self.sql(query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
340 |
self.sql('DELETE FROM dataio_constraints WHERE origtable = %(e)s ' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
341 |
'AND query = %(q)s', {'e': tablename, 'q': query}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
342 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
343 |
def _drop_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
344 |
""" Drop all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
345 |
for tablename in ('created_by_relation', 'owned_by_relation', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
346 |
'is_instance_of_relation', 'identity_relation', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
347 |
'entities'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
self.drop_and_store_indexes_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
350 |
def _create_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
351 |
""" Create all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
352 |
for tablename in ('entities', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
353 |
'created_by_relation', 'owned_by_relation', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
354 |
'is_instance_of_relation', 'identity_relation'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
# Indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
if self.drop_index: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
357 |
self.reapply_constraint_index(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
358 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
def init_relation_table(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
360 |
""" Get and remove all indexes for performance sake """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
361 |
# Create temporary table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
362 |
if not self.slave_mode and rtype not in self._initialized['rtypes']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
363 |
sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
364 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
365 |
if self.drop_index: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
366 |
# Drop indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
367 |
tablename = '%s_relation' % rtype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
368 |
self.drop_and_store_indexes_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
369 |
# Push the etype in the initialized table for easier restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
self.init_create_initialized_table() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
371 |
sql = 'INSERT INTO dataio_initialized VALUES (%(e)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
372 |
self.sql(sql, {'e': rtype, 't': 'rtype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
# Mark rtype as "initialized" for faster check |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
374 |
self._initialized['rtypes'].add(rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
375 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
376 |
def init_create_initialized_table(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
377 |
""" Create the dataio initialized table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
378 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
379 |
if not self._initialized_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
380 |
sql = "CREATE TABLE dataio_initialized (retype text, type varchar(128))" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
381 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
382 |
self._initialized_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
383 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
384 |
def init_etype_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
385 |
""" Add eid sequence to a particular etype table and |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
386 |
remove all indexes for performance sake """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
387 |
if etype not in self._initialized['entities']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
388 |
# Only for non-initialized etype and not slave mode store |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
389 |
if not self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
390 |
if self._eids_seq_range is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
391 |
# Eids are directly set by the entities_id_seq. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
392 |
# We attach this sequence to all the created etypes. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
393 |
sql = ("ALTER TABLE cw_%s ALTER COLUMN cw_eid " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
394 |
"SET DEFAULT nextval('entities_id_seq')" % etype.lower()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
395 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
396 |
if self.drop_index: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
397 |
# Drop indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
398 |
tablename = 'cw_%s' % etype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
399 |
self.drop_and_store_indexes_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
400 |
# Push the etype in the initialized table for easier restart |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
401 |
self.init_create_initialized_table() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
402 |
sql = 'INSERT INTO dataio_initialized VALUES (%(e)s, %(t)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
403 |
self.sql(sql, {'e': etype, 't': 'etype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
404 |
# Mark etype as "initialized" for faster check |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
405 |
self._initialized['entities'].add(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
406 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
407 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
408 |
### ENTITIES CREATION ##################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
409 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
410 |
def _get_eid_gen(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
411 |
""" Function getting the next eid. This is done by preselecting |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
412 |
a given number of eids from the 'entities_id_seq', and then |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
413 |
storing them""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
414 |
while True: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
415 |
last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self._eids_seq_range) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
416 |
for eid in xrange(last_eid - self._eids_seq_range + 1, last_eid + 1): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
417 |
yield eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
418 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
419 |
def apply_size_constraints(self, etype, kwargs): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
420 |
""" Apply the size constraints for a given etype, attribute and value |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
421 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
422 |
size_constraints = self.size_constraints[etype] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
423 |
for attr, value in kwargs.items(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
424 |
if value: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
425 |
maxsize = size_constraints.get(attr) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
426 |
if maxsize is not None and len(value) > maxsize: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
427 |
kwargs[attr] = value[:maxsize-4] + '...' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
428 |
return kwargs |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
429 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
430 |
def apply_default_values(self, etype, kwargs): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
431 |
""" Apply the default values for a given etype, attribute and value |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
432 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
433 |
default_values = self.default_values[etype] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
434 |
missing_keys = set(default_values) - set(kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
435 |
kwargs.update((key, default_values[key]) for key in missing_keys) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
436 |
return kwargs |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
437 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
438 |
def create_entity(self, etype, **kwargs): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
439 |
""" Create an entity |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
440 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
441 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
442 |
self.init_etype_table(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
443 |
# Add meta data if not given |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
444 |
if 'modification_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
445 |
kwargs['modification_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
446 |
if 'creation_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
447 |
kwargs['creation_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
448 |
if 'cwuri' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
449 |
if self.uri_param_name and self.uri_param_name in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
450 |
kwargs['cwuri'] = kwargs[self.uri_param_name] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
451 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
452 |
kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
453 |
self._count_cwuri += 1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
454 |
if 'eid' not in kwargs and self._eids_seq_range is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
455 |
# If eid is not given and the eids sequence is set, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
456 |
# use the value from the sequence |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
457 |
kwargs['eid'] = self.get_next_eid() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
458 |
# Check size constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
459 |
kwargs = self.apply_size_constraints(etype, kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
460 |
# Apply default values |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
461 |
kwargs = self.apply_default_values(etype, kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
462 |
# Push data / Return entity |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
463 |
self._data_entities[etype].append(kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
464 |
entity = self._cnx.vreg['etypes'].etype_class(etype)(self._cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
465 |
entity.cw_attr_cache.update(kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
466 |
if 'eid' in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
467 |
entity.eid = kwargs['eid'] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
468 |
return entity |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
469 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
470 |
### RELATIONS CREATION #################################################### |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
471 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
472 |
def relate(self, subj_eid, rtype, obj_eid, *args, **kwargs): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
473 |
""" Compatibility with other stores |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
474 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
475 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
476 |
self.init_relation_table(rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
477 |
self._data_relations[rtype].append({'eid_from': subj_eid, 'eid_to': obj_eid}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
478 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
479 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
480 |
### FLUSH ################################################################# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
481 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
482 |
def on_commit(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
483 |
if self.on_commit_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
484 |
self.on_commit_callback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
485 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
486 |
def on_rollback(self, exc, etype, data): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
487 |
if self.on_rollback_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
488 |
self.on_rollback_callback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
489 |
self._cnx.rollback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
490 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
491 |
raise exc |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
492 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
493 |
def commit(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
494 |
self.on_commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
495 |
super(MassiveObjectStore, self).commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
496 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
497 |
def flush(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
498 |
""" Flush the data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
499 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
500 |
self.flush_entities() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
501 |
self.flush_internal_relations() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
502 |
self.flush_relations() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
503 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
504 |
def flush_internal_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
505 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
506 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
507 |
for rtype, data in self._data_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
508 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
509 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
510 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
511 |
buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to'), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
512 |
replace_sep=self.replace_sep) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
513 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
514 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
515 |
raise ValueError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
516 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
517 |
# Push into the tmp table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
518 |
cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
519 |
null='NULL', columns=('eid_from', 'eid_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
520 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
521 |
self._data_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
522 |
# Commit if asked |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
523 |
if self.commit_at_flush: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
524 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
525 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
526 |
def flush_entities(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
527 |
""" Flush the entities data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
528 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
529 |
for etype, data in self._data_entities.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
530 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
531 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
532 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
533 |
# XXX It may be interresting to directly infer the columns' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
534 |
# names from the schema instead of using .keys() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
535 |
columns = data[0].keys() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
536 |
# XXX For now, the _create_copyfrom_buffer does a "row[column]" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
537 |
# which can lead to a key error. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
538 |
# Thus we should create dictionary with all the keys. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
539 |
columns = set() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
540 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
541 |
columns.update(d.keys()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
542 |
_data = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
543 |
_base_data = dict.fromkeys(columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
544 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
545 |
_d = _base_data.copy() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
546 |
_d.update(d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
547 |
_data.append(_d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
548 |
buf = pgstore._create_copyfrom_buffer(_data, columns, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
549 |
replace_sep=self.replace_sep) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
550 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
551 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
552 |
raise ValueError('Error in buffer creation for etype %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
553 |
columns = ['cw_%s' % attr for attr in columns] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
554 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
555 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
556 |
cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
557 |
except Exception as exc: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
558 |
self.on_rollback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
559 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
560 |
self._data_entities[etype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
561 |
if self.autoflush_metadata: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
562 |
self.flush_meta_data() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
563 |
# Commit if asked |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
564 |
if self.commit_at_flush: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
565 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
566 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
567 |
def flush_meta_data(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
568 |
""" Flush the meta data (entities table, is_instance table, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
569 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
570 |
if self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
571 |
raise RuntimeError('Flushing meta data is not allow in slave mode') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
572 |
if not self._dbh.table_exists('dataio_initialized'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
573 |
self.logger.info('No information available for initialized etypes/rtypes') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
574 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
575 |
if not self._metadata_table_created: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
576 |
# Keep the correctly flush meta data in database |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
577 |
sql = "CREATE TABLE dataio_metadata (etype text)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
578 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
579 |
self._metadata_table_created = True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
580 |
crs = self.sql('SELECT etype FROM dataio_metadata') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
581 |
already_flushed = set(e for e, in crs.fetchall()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
582 |
crs = self.sql('SELECT retype FROM dataio_initialized WHERE type = %(t)s', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
583 |
{'t': 'etype'}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
584 |
all_etypes = set(e for e, in crs.fetchall()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
585 |
for etype in all_etypes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
586 |
if etype not in already_flushed: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
587 |
# Deals with meta data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
588 |
self.logger.info('Flushing meta data for %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
589 |
self.insert_massive_meta_data(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
590 |
sql = 'INSERT INTO dataio_metadata VALUES (%(e)s)' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
591 |
self.sql(sql, {'e': etype}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
592 |
# Final commit |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
593 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
594 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
595 |
def _cleanup_entities(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
596 |
""" Cleanup etype table """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
597 |
if self._eids_seq_range is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
598 |
# Remove DEFAULT eids sequence if added |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
599 |
sql = 'ALTER TABLE cw_%s ALTER COLUMN cw_eid DROP DEFAULT;' % etype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
600 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
601 |
# Create indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
602 |
if self.drop_index: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
603 |
tablename = SQL_PREFIX + etype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
604 |
self.reapply_constraint_index(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
605 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
606 |
def _cleanup_relations(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
607 |
""" Cleanup rtype table """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
608 |
# Push into relation table while removing duplicate |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
609 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
610 |
T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
611 |
WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
612 |
TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);''' % {'r': rtype} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
613 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
614 |
# Drop temporary relation table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
615 |
sql = ('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
616 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
617 |
# Create indexes and constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
618 |
if self.drop_index: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
619 |
tablename = '%s_relation' % rtype.lower() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
620 |
self.reapply_constraint_index(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
621 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
622 |
def cleanup(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
623 |
""" Remove temporary tables and columns |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
624 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
625 |
self.logger.info("Start cleaning") |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
626 |
if self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
627 |
raise RuntimeError('Store cleanup is not allowed in slave mode') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
628 |
self.logger.info("Start cleaning") |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
629 |
# Cleanup relations tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
630 |
for etype in self._initialized['init_uri_eid']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
631 |
self.sql('DROP TABLE uri_eid_%s' % etype.lower()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
632 |
# Remove relations tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
633 |
for rtype in self._initialized['uri_rtypes']: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
634 |
if not self._cnx.repo.schema.rschema(rtype).inlined: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
635 |
self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
636 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
637 |
self.logger.warning("inlined relation %s: no cleanup to be done for it" % rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
638 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
639 |
# Get all the initialized etypes/rtypes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
640 |
if self._dbh.table_exists('dataio_initialized'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
641 |
crs = self.sql('SELECT retype, type FROM dataio_initialized') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
642 |
for retype, _type in crs.fetchall(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
643 |
self.logger.info('Cleanup for %s' % retype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
644 |
if _type == 'etype': |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
645 |
# Cleanup entities tables - Recreate indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
646 |
self._cleanup_entities(retype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
647 |
elif _type == 'rtype': |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
648 |
# Cleanup relations tables |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
649 |
self._cleanup_relations(retype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
650 |
self.sql('DELETE FROM dataio_initialized WHERE retype = %(e)s', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
651 |
{'e': retype}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
652 |
# Create meta constraints (entities, is_instance_of, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
653 |
self._create_metatables_constraints() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
654 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
655 |
# Delete the meta data table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
656 |
for table_name in ('dataio_initialized', 'dataio_constraints', 'dataio_metadata'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
657 |
if self._dbh.table_exists(table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
658 |
self.sql('DROP TABLE %s' % table_name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
659 |
self.commit() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
660 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
661 |
def insert_massive_meta_data(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
662 |
""" Massive insertion of meta data for a given etype, based on SQL statements. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
663 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
664 |
# Push data - Use coalesce to avoid NULL (and get 0), if there is no |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
665 |
# entities of this type in the entities table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
666 |
# Meta data relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
667 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'created_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
668 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'owned_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
669 |
self.metagen_push_relation(etype, self.source.eid, 'cw_source_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
670 |
self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
671 |
self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_instance_of_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
672 |
sql = ("INSERT INTO entities (eid, type, asource, extid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
673 |
"SELECT cw_eid, '%s', 'system', NULL FROM cw_%s " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
674 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
675 |
% (etype, etype.lower())) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
676 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
677 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
678 |
def metagen_push_relation(self, etype, eid_to, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
679 |
sql = ("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
680 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
681 |
% (rtype, eid_to, etype.lower())) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
682 |
self.sql(sql) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
683 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
684 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
685 |
### CONSTRAINTS MANAGEMENT FUNCTIONS ########################################## |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
686 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
687 |
def get_size_constraints(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
688 |
"""analyzes yams ``schema`` and returns the list of size constraints. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
689 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
690 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
691 |
sub-dictionnaries mapping attribute names -> max size. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
692 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
693 |
size_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
694 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
695 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
696 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
697 |
size_constraints[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
698 |
for rschema, aschema in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
699 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
700 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
701 |
maxsize = None |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
702 |
rdef = rschema.rdef(eschema, aschema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
703 |
for constraint in rdef.constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
704 |
if isinstance(constraint, SizeConstraint): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
705 |
maxsize = constraint.max |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
706 |
eschema_constraints[rschema.type] = maxsize |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
707 |
return size_constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
708 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
709 |
def get_default_values(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
710 |
"""analyzes yams ``schema`` and returns the list of default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
711 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
712 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
713 |
sub-dictionnaries mapping attribute names -> default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
714 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
715 |
default_values = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
716 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
717 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
718 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
719 |
default_values[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
720 |
for rschema, _ in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
721 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
722 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
723 |
if eschema.default(rschema.type) is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
724 |
eschema_constraints[rschema.type] = eschema.default(rschema.type) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
725 |
return default_values |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
726 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
727 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
728 |
class PGHelper(object): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
729 |
def __init__(self, cnx, pg_schema='public'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
730 |
self.cnx = cnx |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
731 |
# Deals with pg schema, see #3216686 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
732 |
self.pg_schema = pg_schema |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
733 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
734 |
def application_indexes_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
735 |
""" Get all the indexes/constraints for a given tablename """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
736 |
indexes = self.application_indexes(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
737 |
constraints = self.application_constraints(tablename) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
738 |
_indexes = {} |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
739 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
740 |
# Remove pkey indexes (automatically created by constraints) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
741 |
# Specific cases of primary key, see #3224079 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
742 |
if name not in constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
743 |
_indexes[name] = query |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
744 |
return _indexes, constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
745 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
746 |
def table_exists(self, table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
747 |
sql = "SELECT * from information_schema.tables WHERE table_name=%(t)s AND table_schema=%(s)s" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
748 |
crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
749 |
res = crs.fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
750 |
if res: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
751 |
return True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
752 |
return False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
753 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
754 |
# def check_if_primary_key_exists_for_table(self, table_name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
755 |
# sql = ("SELECT constraint_name FROM information_schema.table_constraints " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
756 |
# "WHERE constraint_type = 'PRIMARY KEY' AND table_name=%(t)s AND table_schema=%(s)s") |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
757 |
# crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
758 |
# res = crs.fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
759 |
# if res: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
760 |
# return True |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
761 |
# return False |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
762 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
763 |
def index_query(self, name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
764 |
"""Get the request to be used to recreate the index""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
765 |
return self.cnx.system_sql("SELECT pg_get_indexdef(c.oid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
766 |
"from pg_catalog.pg_class c " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
767 |
"LEFT JOIN pg_catalog.pg_namespace n " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
768 |
"ON n.oid = c.relnamespace " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
769 |
"WHERE c.relname = %(r)s AND n.nspname=%(n)s", |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
770 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
771 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
772 |
def constraint_query(self, name): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
773 |
"""Get the request to be used to recreate the constraint""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
774 |
return self.cnx.system_sql("SELECT pg_get_constraintdef(c.oid) " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
775 |
"from pg_catalog.pg_constraint c " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
776 |
"LEFT JOIN pg_catalog.pg_namespace n " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
777 |
"ON n.oid = c.connamespace " |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
778 |
"WHERE c.conname = %(r)s AND n.nspname=%(n)s", |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
779 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
780 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
781 |
def application_indexes(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
782 |
""" Iterate over all the indexes """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
783 |
# This SQL query (cf http://www.postgresql.org/message-id/432F450F.4080700@squiz.net) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
784 |
# aims at getting all the indexes for each table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
785 |
sql = '''SELECT c.relname as "Name" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
786 |
FROM pg_catalog.pg_class c |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
787 |
JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
788 |
JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
789 |
LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
790 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
791 |
WHERE c.relkind IN ('i','') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
792 |
AND c2.relname = '%s' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
793 |
AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
794 |
AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
795 |
indexes_list = self.cnx.system_sql(sql).fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
796 |
indexes = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
797 |
for name, in indexes_list: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
798 |
indexes[name] = self.index_query(name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
799 |
return indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
800 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
801 |
def application_constraints(self, tablename): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
802 |
""" Iterate over all the constraints """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
803 |
sql = '''SELECT i.conname as "Name" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
804 |
FROM pg_catalog.pg_class c JOIN pg_catalog.pg_constraint i |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
805 |
ON i.conrelid = c.oid JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
806 |
LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
807 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
808 |
WHERE c2.relname = '%s' AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
809 |
AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
810 |
indexes_list = self.cnx.system_sql(sql).fetchall() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
811 |
constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
812 |
for name, in indexes_list: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
813 |
query = self.constraint_query(name) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
814 |
constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
815 |
return constraints |