author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Thu, 28 Jan 2016 15:25:32 +0100 | |
changeset 11320 | 78da04c853dc |
parent 11319 | fe90d07f3afa |
child 11321 | fab543f542ac |
permissions | -rw-r--r-- |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# coding: utf-8 |
11305
118d83e65ca8
[dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
2 |
# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# This file is part of CubicWeb. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# terms of the GNU Lesser General Public License as published by the Free |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# any later version. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# details. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# You should have received a copy of the GNU Lesser General Public License along |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
20 |
import logging |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
21 |
from datetime import datetime |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
from collections import defaultdict |
10854
f437787d8849
[dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents:
10853
diff
changeset
|
23 |
from io import StringIO |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
24 |
from itertools import chain |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
25 |
|
10859
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
26 |
from six.moves import range |
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
27 |
|
11308
df75fe529ba8
[dataimport] Prefer now(pytz.utc) to utcnow
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11305
diff
changeset
|
28 |
import pytz |
df75fe529ba8
[dataimport] Prefer now(pytz.utc) to utcnow
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11305
diff
changeset
|
29 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
from yams.constraints import SizeConstraint |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
31 |
|
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
32 |
from cubicweb.schema import PURE_VIRTUAL_RTYPES |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
33 |
from cubicweb.server.schema2sql import rschema_has_table |
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
34 |
from cubicweb.server.sqlutils import SQL_PREFIX |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
from cubicweb.dataimport import stores, pgstore |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
from cubicweb.utils import make_uid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
37 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
38 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
39 |
class MassiveObjectStore(stores.RQLObjectStore): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
40 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
Store for massive import of data, with delayed insertion of meta data. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
42 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
43 |
WARNINGS: |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
44 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
45 |
- This store may only be used with PostgreSQL for now, as it relies |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
46 |
on the COPY FROM method, and on specific PostgreSQL tables to get all |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
47 |
the indexes. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
|
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
49 |
- This store can only insert relations that are not inlined (i.e., |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
50 |
which do *not* have inlined=True in their definition in the schema), unless they are |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
51 |
specified as entity attributes. |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
52 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
53 |
It should be used as follows: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
54 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
55 |
store = MassiveObjectStore(cnx) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
store.init_rtype_table('Person', 'lives_in', 'Location') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
58 |
|
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
59 |
store.prepare_insert_entity('Person', subj_iid_attribute=person_iid, ...) |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
60 |
store.prepare_insert_entity('Location', obj_iid_attribute=location_iid, ...) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
61 |
... |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
62 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
# subj_iid_attribute and obj_iid_attribute are argument names |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
# chosen by the user (e.g. "cwuri"). These names can be identical. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
65 |
# person_iid and location_iid are unique IDs and depend on the data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
# (e.g URI). |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
67 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
68 |
store.relate_by_iid(person_iid, 'lives_in', location_iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
69 |
# For example: |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
70 |
store.prepare_insert_entity('Person', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
71 |
cwuri='http://dbpedia.org/toto', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
72 |
name='Toto') |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
73 |
store.prepare_insert_entity('Location', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
74 |
uri='http://geonames.org/11111', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
75 |
name='Somewhere') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
77 |
store.relate_by_iid('http://dbpedia.org/toto', |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
78 |
'lives_in', |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
79 |
'http://geonames.org/11111') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
80 |
# Finally |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
81 |
store.convert_relations('Person', 'lives_in', 'Location', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
82 |
'subj_iid_attribute', 'obj_iid_attribute') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
83 |
# For the previous example: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
84 |
store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
85 |
... |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
86 |
store.commit() |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
87 |
store.finish() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
88 |
""" |
10873
0611466ce367
[dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents:
10872
diff
changeset
|
89 |
# max size of the iid, used to create the iid_eid conversion table |
0611466ce367
[dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents:
10872
diff
changeset
|
90 |
iid_maxsize = 1024 |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
91 |
|
10875
75d1b2d66f18
[dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10874
diff
changeset
|
92 |
def __init__(self, cnx, |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
93 |
on_commit_callback=None, on_rollback_callback=None, |
10865
2537df9fdd27
[dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10864
diff
changeset
|
94 |
slave_mode=False, |
11028
66f94d7f9ca7
[dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents:
11027
diff
changeset
|
95 |
source=None, |
66f94d7f9ca7
[dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents:
11027
diff
changeset
|
96 |
eids_seq_range=10000): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
97 |
""" Create a MassiveObject store, with the following attributes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
98 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
99 |
- cnx: CubicWeb cnx |
11028
66f94d7f9ca7
[dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents:
11027
diff
changeset
|
100 |
- eids_seq_range: size of eid range reserved by the store for each batch |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
101 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
102 |
super(MassiveObjectStore, self).__init__(cnx) |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
103 |
self.on_commit_callback = on_commit_callback |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
104 |
self.on_rollback_callback = on_rollback_callback |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
105 |
self.slave_mode = slave_mode |
11028
66f94d7f9ca7
[dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents:
11027
diff
changeset
|
106 |
self.eids_seq_range = eids_seq_range |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
107 |
|
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
108 |
self.logger = logging.getLogger('dataimport.massive_store') |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
109 |
self.sql = cnx.system_sql |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
110 |
self.schema = self._cnx.vreg.schema |
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
111 |
self.default_values = get_default_values(self.schema) |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
112 |
self.get_next_eid = lambda g=self._get_eid_gen(): next(g) |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
113 |
self._dbh = PGHelper(cnx) |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
114 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
115 |
cnx.read_security = False |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
116 |
cnx.write_security = False |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
117 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
118 |
self._data_entities = defaultdict(list) |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
119 |
self._data_relations = defaultdict(list) |
11320
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
120 |
self._initialized = set() |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
121 |
# uri handling |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
122 |
self._data_uri_relations = defaultdict(list) |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
123 |
# etypes for which we have a uri_eid_%(etype)s table |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
124 |
self._init_uri_eid = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
125 |
# etypes for which we have a uri_eid_%(e)s_idx index |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
126 |
self._uri_eid_inserted = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
127 |
# set of rtypes for which we have a %(rtype)s_relation_iid_tmp table |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
128 |
self._uri_rtypes = set() |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
129 |
|
11308
df75fe529ba8
[dataimport] Prefer now(pytz.utc) to utcnow
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11305
diff
changeset
|
130 |
self._now = datetime.now(pytz.utc) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
131 |
self._default_cwuri = make_uid('_auto_generated') |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
132 |
|
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
133 |
if not self.slave_mode: |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
134 |
# drop constraint and metadata table, they will be recreated when self.finish() is |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
135 |
# called |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
136 |
self._drop_all_constraints() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
137 |
self._drop_metatables_constraints() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
138 |
if source is None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
139 |
source = cnx.repo.system_source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
140 |
self.source = source |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
141 |
|
11320
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
142 |
# URI related things ####################################################### |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
143 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
144 |
def init_rtype_table(self, etype_from, rtype, etype_to): |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
145 |
""" Build temporary table for standard rtype """ |
11320
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
146 |
# Create an uri_eid table for each etype for a better control of which etype is concerned by |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
147 |
# a particular possibly multivalued relation. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
148 |
for etype in (etype_from, etype_to): |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
149 |
if etype and etype not in self._init_uri_eid: |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
150 |
self._init_uri_eid.add(etype) |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
151 |
self.sql('CREATE TABLE IF NOT EXISTS uri_eid_%(e)s' |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
152 |
'(uri character varying(%(size)s), eid integer)' |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
153 |
% {'e': etype.lower(), 'size': self.iid_maxsize}) |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
154 |
if rtype not in self._uri_rtypes: |
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
155 |
# Create the temporary table |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
156 |
if not self.schema.rschema(rtype).inlined: |
11311
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
157 |
self.sql('CREATE TABLE IF NOT EXISTS %(r)s_relation_iid_tmp' |
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
158 |
'(uri_from character varying(%(s)s), uri_to character varying(%(s)s))' |
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
159 |
% {'r': rtype, 's': self.iid_maxsize}) |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
160 |
self._uri_rtypes.add(rtype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
161 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
162 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
163 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
164 |
def relate_by_iid(self, iid_from, rtype, iid_to): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
165 |
"""Add new relation based on the internal id (iid) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
166 |
of the entities (not the eid)""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
167 |
# Push data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
168 |
if isinstance(iid_from, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
169 |
iid_from = iid_from.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
170 |
if isinstance(iid_to, unicode): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
171 |
iid_to = iid_to.encode('utf-8') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
172 |
self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
173 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
174 |
def flush_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
175 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
176 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
177 |
for rtype, data in self._data_uri_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
178 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
179 |
self.logger.info('No data for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
180 |
buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data])) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
181 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
182 |
self.logger.info('Empty Buffer for rtype %s', rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
183 |
continue |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
184 |
cursor = self._cnx.cnxset.cu |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
185 |
if not self.schema.rschema(rtype).inlined: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
186 |
cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
187 |
null='NULL', columns=('uri_from', 'uri_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
188 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
189 |
self.logger.warning("inlined relation %s: cannot insert it", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
190 |
buf.close() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
192 |
self._data_uri_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
193 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
194 |
def fill_uri_eid_table(self, etype, uri_label): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
195 |
""" Fill the uri_eid table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
196 |
""" |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
197 |
if etype not in self._uri_eid_inserted: |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
198 |
self._uri_eid_inserted.add(etype) |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
199 |
self.logger.info('Fill uri_eid for etype %s', etype) |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
200 |
self.sql('INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s' |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
201 |
% {'l': uri_label, 'e': etype.lower()}) |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
202 |
self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s(uri)' |
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
203 |
% {'e': etype.lower()}) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
204 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
205 |
def convert_relations(self, etype_from, rtype, etype_to, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
206 |
uri_label_from='cwuri', uri_label_to='cwuri'): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
207 |
""" Flush the converted relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
208 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
# Always flush relations to be sure |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
210 |
self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
211 |
self.flush_relations() |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
212 |
if uri_label_from: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
213 |
self.fill_uri_eid_table(etype_from, uri_label_from) |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
214 |
if uri_label_to: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
self.fill_uri_eid_table(etype_to, uri_label_to) |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
216 |
if self.schema.rschema(rtype).inlined: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
217 |
self.logger.warning("Can't insert inlined relation %s", rtype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
218 |
return |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
219 |
if uri_label_from and uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
220 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
221 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
222 |
WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
223 |
SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
224 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
225 |
elif uri_label_to: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
226 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
227 |
CAST(T.uri_from AS INTEGER), O1.eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
228 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
229 |
WHERE O1.uri=T.uri_to AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
230 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
231 |
TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
232 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
233 |
elif uri_label_from: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
234 |
sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
235 |
O1.eid, CAST(T.uri_to AS INTEGER) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
236 |
FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
237 |
WHERE O1.uri=T.uri_from AND NOT EXISTS ( |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
238 |
SELECT 1 FROM %(r)s_relation AS TT WHERE |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
239 |
TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER)); |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
240 |
''' |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
241 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
242 |
self.sql(sql % {'r': rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
243 |
'et': etype_to.lower() if etype_to else u'', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
244 |
'ef': etype_from.lower() if etype_from else u''}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
245 |
except Exception as ex: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
246 |
self.logger.error("Can't insert relation %s: %s", rtype, ex) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
247 |
|
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
248 |
# SQL UTILITIES ######################################################### |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
249 |
|
11320
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
250 |
def _drop_all_constraints(self): |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
251 |
etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities() |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
252 |
if not eschema.final) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
253 |
rtypes_tables = ('%s_relation' % rschema.type.lower() for rschema in self.schema.relations() |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
254 |
if rschema_has_table(rschema, skip_relations=PURE_VIRTUAL_RTYPES)) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
255 |
# Create a table to save the constraints, it allows reloading even after crash |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
256 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints' |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
257 |
'(origtable text, query text, type varchar(256))') |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
258 |
for tablename in chain(etypes_tables, rtypes_tables, ('entities',)): |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
259 |
constraints = self._dbh.table_constraints(tablename) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
260 |
for name, query in constraints.items(): |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
261 |
self.sql('INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)', |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
262 |
{'e': tablename, 'c': query, 't': 'constraint'}) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
263 |
self.sql('ALTER TABLE %s DROP CONSTRAINT %s' % (tablename, name)) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
264 |
|
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
265 |
def _reapply_all_constraints(self): |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
266 |
if not self._dbh.table_exists('cwmassive_constraints'): |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
267 |
self.logger.info('The table cwmassive_constraints does not exist') |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
268 |
return |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
269 |
cu = self.sql("SELECT query FROM cwmassive_constraints WHERE type='constraint'") |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
270 |
for query, in cu.fetchall(): |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
271 |
self.sql(query) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
272 |
self.sql("DELETE FROM cwmassive_constraints WHERE type='constraint' AND query=%(q)s", |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
273 |
{'q': query}) |
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
274 |
|
11022
1032d7956b11
[dataimport] massive_store: do not drop constraints multiple times
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11021
diff
changeset
|
275 |
def drop_and_store_indexes(self, tablename): |
11311
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
276 |
"""Drop indexes and constraints""" |
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
277 |
# Create a table to save the constraints, it allows reloading even after crash |
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
278 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints' |
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
279 |
'(origtable text, query text, type varchar(256))') |
11022
1032d7956b11
[dataimport] massive_store: do not drop constraints multiple times
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11021
diff
changeset
|
280 |
self._drop_table_indexes(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
|
11022
1032d7956b11
[dataimport] massive_store: do not drop constraints multiple times
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11021
diff
changeset
|
282 |
def _drop_table_indexes(self, tablename): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
""" Drop and store table constraints and indexes """ |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
284 |
indexes = self._dbh.table_indexes(tablename) |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
285 |
for name, query in indexes.items(): |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
286 |
self.sql('INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)', |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
287 |
{'e': tablename, 'c': query, 't': 'index'}) |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
288 |
self.sql('DROP INDEX %s' % name) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
def reapply_constraint_index(self, tablename): |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
291 |
if not self._dbh.table_exists('cwmassive_constraints'): |
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
292 |
self.logger.info('The table cwmassive_constraints does not exist') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
return |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
294 |
cu = self.sql('SELECT query FROM cwmassive_constraints WHERE origtable = %(e)s', |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
295 |
{'e': tablename}) |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
296 |
for query, in cu.fetchall(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
self.sql(query) |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
298 |
self.sql('DELETE FROM cwmassive_constraints WHERE origtable = %(e)s ' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
299 |
'AND query = %(q)s', {'e': tablename, 'q': query}) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
300 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
def _drop_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
""" Drop all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
for tablename in ('created_by_relation', 'owned_by_relation', |
10861
6205b89c3af6
[massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10859
diff
changeset
|
304 |
'is_instance_of_relation', 'is_relation', |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
305 |
'entities'): |
11022
1032d7956b11
[dataimport] massive_store: do not drop constraints multiple times
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11021
diff
changeset
|
306 |
self.drop_and_store_indexes(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
def _create_metatables_constraints(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
""" Create all the constraints for the meta data""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
310 |
for tablename in ('entities', |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
'created_by_relation', 'owned_by_relation', |
10861
6205b89c3af6
[massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10859
diff
changeset
|
312 |
'is_instance_of_relation', 'is_relation'): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
# Indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
314 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
316 |
def init_relation_table(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
""" Get and remove all indexes for performance sake """ |
11318
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
318 |
if not self.slave_mode and rtype not in self._initialized: |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
319 |
self._initialized.add(rtype) |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
320 |
self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)' |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
321 |
% rtype.lower()) |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
322 |
# Drop indexes and constraints |
11318
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
323 |
self.drop_and_store_indexes('%s_relation' % rtype.lower()) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
324 |
# Push the etype in the initialized table for easier restart |
11318
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
325 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized' |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
326 |
'(retype text, type varchar(128))') |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
327 |
self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'rtype')", {'e': rtype}) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
def init_etype_table(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
""" Add eid sequence to a particular etype table and |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
remove all indexes for performance sake """ |
11318
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
332 |
if not self.slave_mode and etype not in self._initialized: |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
333 |
self._initialized.add(etype) |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
334 |
# Drop indexes and constraints |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
335 |
self.drop_and_store_indexes('cw_%s' % etype.lower()) |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
336 |
# Push the rtype in the initialized table for easier restart |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
337 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized' |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
338 |
'(retype text, type varchar(128))') |
09731bd52887
[dataimport] use a single _initialized set instead of _entities / _rtypes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11317
diff
changeset
|
339 |
self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype}) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
340 |
|
11026
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
341 |
def restart_eid_sequence(self, start_eid): |
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
342 |
self._cnx.system_sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange( |
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
343 |
'entities_id_seq', initial_value=start_eid)) |
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
344 |
self._cnx.commit() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
345 |
|
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
346 |
# ENTITIES CREATION ##################################################### |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
347 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
def _get_eid_gen(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
""" Function getting the next eid. This is done by preselecting |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
350 |
a given number of eids from the 'entities_id_seq', and then |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
351 |
storing them""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
352 |
while True: |
10872
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
353 |
last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range) |
ff4f94cfa2fb
[dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents:
10871
diff
changeset
|
354 |
for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
yield eid |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
|
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
357 |
def _apply_default_values(self, etype, kwargs): |
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
358 |
"""Apply the default values for a given etype, attribute and value.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
default_values = self.default_values[etype] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
360 |
missing_keys = set(default_values) - set(kwargs) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
361 |
kwargs.update((key, default_values[key]) for key in missing_keys) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
362 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
363 |
# store api ################################################################ |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
364 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
365 |
def prepare_insert_entity(self, etype, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
366 |
"""Given an entity type, attributes and inlined relations, returns the inserted entity's |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
367 |
eid. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
368 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
369 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
self.init_etype_table(etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
371 |
# Add meta data if not given |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
372 |
if 'modification_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
kwargs['modification_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
374 |
if 'creation_date' not in kwargs: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
375 |
kwargs['creation_date'] = self._now |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
376 |
if 'cwuri' not in kwargs: |
10874
538e17174769
[dataimport] remove uri_param_name from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10873
diff
changeset
|
377 |
kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri) |
538e17174769
[dataimport] remove uri_param_name from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10873
diff
changeset
|
378 |
self._count_cwuri += 1 |
11027
ec5eeb08f2e8
[dataimport] stop supporting None as eids_seq_range
Julien Cristau <julien.cristau@logilab.fr>
parents:
11026
diff
changeset
|
379 |
if 'eid' not in kwargs: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
380 |
# If eid is not given and the eids sequence is set, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
381 |
# use the value from the sequence |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
382 |
kwargs['eid'] = self.get_next_eid() |
10864
b7f4acf0473b
[dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10863
diff
changeset
|
383 |
self._apply_default_values(etype, kwargs) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
384 |
self._data_entities[etype].append(kwargs) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
385 |
return kwargs.get('eid') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
386 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
387 |
def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
388 |
"""Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
389 |
and ``eid_to``. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
390 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
391 |
# Init the table if necessary |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
392 |
self.init_relation_table(rtype) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
393 |
self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
394 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
395 |
def flush(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
396 |
"""Flush the data""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
397 |
self.flush_entities() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
398 |
self.flush_internal_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
399 |
self.flush_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
400 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
401 |
def commit(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
402 |
"""Commit the database transaction.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
403 |
self.on_commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
404 |
super(MassiveObjectStore, self).commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
405 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
406 |
def finish(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
407 |
"""Remove temporary tables and columns.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
408 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
409 |
if self.slave_mode: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
410 |
raise RuntimeError('Store cleanup is not allowed in slave mode') |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
411 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
412 |
# Cleanup relations tables |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
413 |
for etype in self._init_uri_eid: |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
414 |
self.sql('DROP TABLE uri_eid_%s' % etype.lower()) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
415 |
# Remove relations tables |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
416 |
for rtype in self._uri_rtypes: |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
417 |
self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype}) |
11020
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
418 |
# Create meta constraints (entities, is_instance_of, ...) |
c8c8f6a6147f
[dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents:
11019
diff
changeset
|
419 |
self._create_metatables_constraints() |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
420 |
# Get all the initialized etypes/rtypes |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
421 |
if self._dbh.table_exists('cwmassive_initialized'): |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
422 |
cu = self.sql('SELECT retype, type FROM cwmassive_initialized') |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
423 |
for retype, _type in cu.fetchall(): |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
424 |
self.logger.info('Cleanup for %s' % retype) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
425 |
if _type == 'etype': |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
426 |
# Cleanup entities tables - Recreate indexes |
11319
fe90d07f3afa
[dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11318
diff
changeset
|
427 |
self.reapply_constraint_index('cw_%s' % etype.lower()) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
428 |
elif _type == 'rtype': |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
429 |
# Cleanup relations tables |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
430 |
self._cleanup_relations(retype) |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
431 |
self.sql('DELETE FROM cwmassive_initialized WHERE retype = %(e)s', |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
432 |
{'e': retype}) |
11320
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
433 |
self._reapply_all_constraints() |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
434 |
# Delete the meta data table |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
435 |
for table_name in ('cwmassive_initialized', 'cwmassive_constraints', 'cwmassive_metadata'): |
11311
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
436 |
self.sql('DROP TABLE IF EXISTS %s' % table_name) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
437 |
self.commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
438 |
|
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
439 |
# FLUSH ################################################################# |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
440 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
441 |
def on_commit(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
442 |
if self.on_commit_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
443 |
self.on_commit_callback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
444 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
445 |
def on_rollback(self, exc, etype, data): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
446 |
if self.on_rollback_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
447 |
self.on_rollback_callback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
448 |
self._cnx.rollback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
449 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
450 |
raise exc |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
451 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
452 |
def flush_internal_relations(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
453 |
""" Flush the relations data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
454 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
455 |
for rtype, data in self._data_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
456 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
457 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
458 |
continue |
10871
1d4a94d04ec6
[dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10870
diff
changeset
|
459 |
buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to')) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
460 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
461 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
462 |
raise ValueError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
463 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
464 |
# Push into the tmp table |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
465 |
cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(), |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
466 |
null='NULL', columns=('eid_from', 'eid_to')) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
467 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
468 |
self._data_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
469 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
470 |
def flush_entities(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
471 |
""" Flush the entities data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
472 |
""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
473 |
for etype, data in self._data_entities.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
474 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
475 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
476 |
continue |
11305
118d83e65ca8
[dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
477 |
# XXX It may be interresting to directly infer the columns' names from the schema |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
478 |
# XXX For now, the _create_copyfrom_buffer does a "row[column]" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
479 |
# which can lead to a key error. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
480 |
# Thus we should create dictionary with all the keys. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
481 |
columns = set() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
482 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
483 |
columns.update(d.keys()) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
484 |
_data = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
485 |
_base_data = dict.fromkeys(columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
486 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
487 |
_d = _base_data.copy() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
488 |
_d.update(d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
489 |
_data.append(_d) |
10871
1d4a94d04ec6
[dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10870
diff
changeset
|
490 |
buf = pgstore._create_copyfrom_buffer(_data, columns) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
491 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
492 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
493 |
raise ValueError('Error in buffer creation for etype %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
494 |
columns = ['cw_%s' % attr for attr in columns] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
495 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
496 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
497 |
cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
498 |
except Exception as exc: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
499 |
self.on_rollback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
500 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
501 |
self._data_entities[etype] = [] |
11025
5413ab08617d
[dataimport] massive store in slave mode shouldn't flush metadata
Julien Cristau <julien.cristau@logilab.fr>
parents:
11023
diff
changeset
|
502 |
if not self.slave_mode: |
5413ab08617d
[dataimport] massive store in slave mode shouldn't flush metadata
Julien Cristau <julien.cristau@logilab.fr>
parents:
11023
diff
changeset
|
503 |
self.flush_meta_data() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
504 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
505 |
def flush_meta_data(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
506 |
""" Flush the meta data (entities table, is_instance table, ...) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
507 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
508 |
if self.slave_mode: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
509 |
raise RuntimeError('Flushing meta data is not allow in slave mode') |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
510 |
if not self._dbh.table_exists('cwmassive_initialized'): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
511 |
self.logger.info('No information available for initialized etypes/rtypes') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
512 |
return |
11311
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
513 |
# Keep the correctly flush meta data in database |
fd45fc498c1b
[dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11310
diff
changeset
|
514 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_metadata (etype text)') |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
515 |
cu = self.sql('SELECT etype FROM cwmassive_metadata') |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
516 |
already_flushed = set(e for e, in cu.fetchall()) |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
517 |
cu = self.sql('SELECT retype FROM cwmassive_initialized WHERE type = %(t)s', |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
518 |
{'t': 'etype'}) |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
519 |
all_etypes = set(e for e, in cu.fetchall()) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
520 |
for etype in all_etypes: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
521 |
if etype not in already_flushed: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
522 |
# Deals with meta data |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
523 |
self.logger.info('Flushing meta data for %s' % etype) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
524 |
self.insert_massive_meta_data(etype) |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
525 |
self.sql('INSERT INTO cwmassive_metadata VALUES (%(e)s)', {'e': etype}) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
526 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
527 |
def _cleanup_relations(self, rtype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
528 |
""" Cleanup rtype table """ |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
529 |
# Push into relation table while removing duplicate |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
530 |
self.sql('INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT ' |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
531 |
'T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T ' |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
532 |
'WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE ' |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
533 |
'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);' % {'r': rtype}) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
534 |
# Drop temporary relation table |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
535 |
self.sql('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()}) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
536 |
# Create indexes and constraints |
10869
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
537 |
tablename = '%s_relation' % rtype.lower() |
575982c948a9
[dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10867
diff
changeset
|
538 |
self.reapply_constraint_index(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
539 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
540 |
def insert_massive_meta_data(self, etype): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
541 |
""" Massive insertion of meta data for a given etype, based on SQL statements. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
542 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
543 |
# Push data - Use coalesce to avoid NULL (and get 0), if there is no |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
544 |
# entities of this type in the entities table. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
545 |
# Meta data relations |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
546 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'created_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
547 |
self.metagen_push_relation(etype, self._cnx.user.eid, 'owned_by_relation') |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
548 |
self.metagen_push_relation(etype, self.source.eid, 'cw_source_relation') |
11317
4085a452b6b4
[dataimport] drop massive store etype to eid index
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11316
diff
changeset
|
549 |
eschema = self.schema[etype].eid |
4085a452b6b4
[dataimport] drop massive store etype to eid index
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11316
diff
changeset
|
550 |
self.metagen_push_relation(etype, eschema.eid, 'is_relation') |
4085a452b6b4
[dataimport] drop massive store etype to eid index
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11316
diff
changeset
|
551 |
for parent_eschema in eschema.ancestors() + [eschema]: |
4085a452b6b4
[dataimport] drop massive store etype to eid index
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11316
diff
changeset
|
552 |
self.metagen_push_relation(etype, parent_eschema.eid, 'is_instance_of_relation') |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
553 |
self.sql("INSERT INTO entities (eid, type, asource, extid) " |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
554 |
"SELECT cw_eid, '%s', 'system', NULL FROM cw_%s " |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
555 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
556 |
% (etype, etype.lower())) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
557 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
558 |
def metagen_push_relation(self, etype, eid_to, rtype): |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
559 |
self.sql("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s " |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
560 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
561 |
% (rtype, eid_to, etype.lower())) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
562 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
563 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
564 |
def get_size_constraints(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
565 |
"""analyzes yams ``schema`` and returns the list of size constraints. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
566 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
567 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
568 |
sub-dictionnaries mapping attribute names -> max size. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
569 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
570 |
size_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
571 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
572 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
573 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
574 |
size_constraints[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
575 |
for rschema, aschema in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
576 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
577 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
578 |
maxsize = None |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
579 |
rdef = rschema.rdef(eschema, aschema) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
580 |
for constraint in rdef.constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
581 |
if isinstance(constraint, SizeConstraint): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
582 |
maxsize = constraint.max |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
583 |
eschema_constraints[rschema.type] = maxsize |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
584 |
return size_constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
585 |
|
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
586 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
587 |
def get_default_values(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
588 |
"""analyzes yams ``schema`` and returns the list of default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
589 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
590 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
591 |
sub-dictionnaries mapping attribute names -> default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
592 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
593 |
default_values = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
594 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
595 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
596 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
597 |
default_values[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
598 |
for rschema, _ in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
599 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
600 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
601 |
if eschema.default(rschema.type) is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
602 |
eschema_constraints[rschema.type] = eschema.default(rschema.type) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
603 |
return default_values |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
604 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
605 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
606 |
class PGHelper(object): |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
607 |
"""This class provides some helper methods to manipulate a postgres database metadata (index and |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
608 |
constraints). |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
609 |
""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
610 |
|
11310
e0b7277e5394
[dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11308
diff
changeset
|
611 |
def __init__(self, cnx): |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
612 |
self.sql = cnx.system_sql |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
613 |
# Deals with pg schema, see #3216686 |
11310
e0b7277e5394
[dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11308
diff
changeset
|
614 |
pg_schema = cnx.repo.config.system_source_config.get('db-namespace') or 'public' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
615 |
self.pg_schema = pg_schema |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
616 |
|
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
617 |
def table_exists(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
618 |
"""Return True if the given table already exists in the database.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
619 |
cu = self.sql('SELECT 1 from information_schema.tables ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
620 |
'WHERE table_name=%(t)s AND table_schema=%(s)s', |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
621 |
{'t': tablename, 's': self.pg_schema}) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
622 |
return bool(cu.fetchone()) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
623 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
624 |
def table_indexes_constraints(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
625 |
"""Return one dictionary with all indexes by name, another with all constraints by name, |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
626 |
for the given table. |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
627 |
""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
628 |
indexes = self.table_indexes(tablename) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
629 |
constraints = self.table_constraints(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
630 |
_indexes = {} |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
631 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
632 |
# Remove pkey indexes (automatically created by constraints) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
633 |
# Specific cases of primary key, see #3224079 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
634 |
if name not in constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
635 |
_indexes[name] = query |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
636 |
return _indexes, constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
637 |
|
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
638 |
def table_indexes(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
639 |
"""Return a dictionary of indexes {index name: index sql}, constraints included.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
640 |
indexes = {} |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
641 |
for name in self._index_names(tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
642 |
indexes[name] = self._index_sql(name) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
643 |
return indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
644 |
|
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
645 |
def table_constraints(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
646 |
"""Return a dictionary of constraints {constraint name: constraint sql}.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
647 |
constraints = {} |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
648 |
for name in self._constraint_names(tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
649 |
query = self._constraint_sql(name) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
650 |
constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
651 |
return constraints |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
652 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
653 |
def _index_names(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
654 |
"""Return the names of all indexes in the given table (including constraints.)""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
655 |
cu = self.sql("SELECT c.relname FROM pg_catalog.pg_class c " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
656 |
"JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
657 |
"JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
658 |
"LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
659 |
"LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
660 |
"WHERE c.relkind IN ('i','') " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
661 |
" AND c2.relname = %(t)s " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
662 |
" AND i.indisprimary = FALSE " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
663 |
" AND n.nspname NOT IN ('pg_catalog', 'pg_toast') " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
664 |
" AND pg_catalog.pg_table_is_visible(c.oid);", {'t': tablename}) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
665 |
return [name for name, in cu.fetchall()] |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
666 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
667 |
def _constraint_names(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
668 |
"""Return the names of all constraints in the given table.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
669 |
cu = self.sql("SELECT i.conname FROM pg_catalog.pg_class c " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
670 |
"JOIN pg_catalog.pg_constraint i ON i.conrelid = c.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
671 |
"JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
672 |
"LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
673 |
"LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
674 |
"WHERE c2.relname = %(t)s " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
675 |
"AND n.nspname NOT IN ('pg_catalog', 'pg_toast') " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
676 |
"AND pg_catalog.pg_table_is_visible(c.oid)", {'t': tablename}) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
677 |
return [name for name, in cu.fetchall()] |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
678 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
679 |
def _index_sql(self, name): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
680 |
"""Return the SQL to be used to recreate the index of the given name.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
681 |
return self.sql('SELECT pg_get_indexdef(c.oid) FROM pg_catalog.pg_class c ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
682 |
'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
683 |
'WHERE c.relname = %(r)s AND n.nspname=%(n)s', |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
684 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
685 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
686 |
def _constraint_sql(self, name): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
687 |
"""Return the SQL to be used to recreate the constraint.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
688 |
return self.sql('SELECT pg_get_constraintdef(c.oid) FROM pg_catalog.pg_constraint c ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
689 |
'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.connamespace ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
690 |
'WHERE c.conname = %(r)s AND n.nspname=%(n)s', |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
691 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |