author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Wed, 05 Oct 2016 09:38:05 +0200 | |
changeset 11784 | c1aa50a88de3 |
parent 11783 | 8865c9e55575 |
child 11785 | 0cea67f41d0c |
permissions | -rw-r--r-- |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
1 |
# coding: utf-8 |
11305
118d83e65ca8
[dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
2 |
# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
3 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
4 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
5 |
# This file is part of CubicWeb. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
6 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
7 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
8 |
# terms of the GNU Lesser General Public License as published by the Free |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
9 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
10 |
# any later version. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
11 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
12 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
13 |
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
14 |
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
15 |
# details. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
16 |
# |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
17 |
# You should have received a copy of the GNU Lesser General Public License along |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
18 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
19 |
|
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
20 |
from base64 import b64encode |
11326
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
21 |
from copy import copy |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
22 |
from collections import defaultdict |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
23 |
from itertools import chain |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
24 |
import logging |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
25 |
from uuid import uuid4 |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
26 |
|
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
27 |
from six import text_type |
10859
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
28 |
from six.moves import range |
375a8232e61c
[dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents:
10856
diff
changeset
|
29 |
|
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
30 |
from cubicweb.dataimport import stores, pgstore |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
31 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
32 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
33 |
class MassiveObjectStore(stores.RQLObjectStore): |
11331
f2ff82dfcd5c
[dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11330
diff
changeset
|
34 |
"""Store for massive import of data, with delayed insertion of meta data. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
35 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
36 |
WARNINGS: |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
37 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
38 |
- This store may only be used with PostgreSQL for now, as it relies |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
39 |
on the COPY FROM method, and on specific PostgreSQL tables to get all |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
40 |
the indexes. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
41 |
|
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
42 |
- This store can only insert relations that are not inlined (i.e., |
11707
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
43 |
which do *not* have inlined=True in their definition in the schema), |
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
44 |
unless they are specified as entity attributes. |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
45 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
46 |
It should be used as follows: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
47 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
48 |
store = MassiveObjectStore(cnx) |
11707
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
49 |
eid_p = store.prepare_insert_entity('Person', |
11783
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
50 |
cwuri=u'http://dbpedia.org/toto', |
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
51 |
name=u'Toto') |
11707
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
52 |
eid_loc = store.prepare_insert_entity('Location', |
11783
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
53 |
cwuri=u'http://geonames.org/11111', |
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
54 |
name=u'Somewhere') |
11707
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
55 |
store.prepare_insert_relation(eid_p, 'lives_in', eid_loc) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
56 |
store.flush() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
57 |
... |
10882
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
58 |
store.commit() |
634cc266e48f
[dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents:
10881
diff
changeset
|
59 |
store.finish() |
11331
f2ff82dfcd5c
[dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11330
diff
changeset
|
60 |
|
f2ff82dfcd5c
[dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11330
diff
changeset
|
61 |
Full-text indexation is not handled, you'll have to reindex the proper entity types by yourself |
f2ff82dfcd5c
[dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11330
diff
changeset
|
62 |
if desired. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
63 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
64 |
|
10875
75d1b2d66f18
[dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents:
10874
diff
changeset
|
65 |
def __init__(self, cnx, |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
66 |
on_commit_callback=None, on_rollback_callback=None, |
10865
2537df9fdd27
[dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10864
diff
changeset
|
67 |
slave_mode=False, |
11326
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
68 |
eids_seq_range=10000, |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
69 |
metagen=None): |
11783
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
70 |
"""Create a MassiveObject store, with the following arguments: |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
71 |
|
11783
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
72 |
- `cnx`, a connection to the repository |
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
73 |
- `metagen`, optional :class:`MetadataGenerator` instance |
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
74 |
- `eids_seq_range`: size of eid range reserved by the store for each batch |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
75 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
76 |
super(MassiveObjectStore, self).__init__(cnx) |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
77 |
|
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
78 |
self.uuid = text_type(uuid4()).replace('-', '') |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
79 |
self.on_commit_callback = on_commit_callback |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
80 |
self.on_rollback_callback = on_rollback_callback |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
81 |
self.slave_mode = slave_mode |
11028
66f94d7f9ca7
[dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents:
11027
diff
changeset
|
82 |
self.eids_seq_range = eids_seq_range |
11326
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
83 |
if metagen is None: |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
84 |
metagen = stores.MetadataGenerator(cnx) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
85 |
self.metagen = metagen |
10879
3193d9ede8dd
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents:
10878
diff
changeset
|
86 |
|
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
87 |
self.logger = logging.getLogger('dataimport.massive_store') |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
88 |
self.sql = cnx.system_sql |
11783
8865c9e55575
[massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11782
diff
changeset
|
89 |
self.schema = cnx.vreg.schema |
11316
36c7cd362fc7
[dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11315
diff
changeset
|
90 |
self.default_values = get_default_values(self.schema) |
11315
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
91 |
self.get_next_eid = lambda g=self._get_eid_gen(): next(g) |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
92 |
self._dbh = PGHelper(cnx) |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
93 |
|
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
94 |
self._data_entities = defaultdict(list) |
ad826d81e88e
[dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11314
diff
changeset
|
95 |
self._data_relations = defaultdict(list) |
11320
78da04c853dc
[dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11319
diff
changeset
|
96 |
self._initialized = set() |
11777
5b535fe2f364
[massive store] Lazy removal of constraints and metadata indexes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11774
diff
changeset
|
97 |
self._constraints_dropped = self.slave_mode |
11326
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
98 |
|
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
99 |
def _get_eid_gen(self): |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
100 |
""" Function getting the next eid. This is done by preselecting |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
101 |
a given number of eids from the 'entities_id_seq', and then |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
102 |
storing them""" |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
103 |
while True: |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
104 |
last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
105 |
for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1): |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
106 |
yield eid |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
107 |
|
11781
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
108 |
# master/slaves specific API |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
109 |
|
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
110 |
def master_init(self): |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
111 |
"""Initialize database for massive insertion. |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
112 |
|
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
113 |
This is expected to be called once, by the master store in master/slaves configuration. |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
114 |
""" |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
115 |
assert not self.slave_mode |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
116 |
if self not in self._initialized: |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
117 |
self.sql('CREATE TABLE cwmassive_initialized' |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
118 |
'(retype text, type varchar(128), uuid varchar(32))') |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
119 |
self._initialized.append(self) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
120 |
|
11781
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
121 |
def master_init_etype(self, etype): |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
122 |
"""Initialize database for insertion of entities of the given etype. |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
123 |
|
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
124 |
This is expected to be called once, usually by the master store in master/slaves |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
125 |
configuration. |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
126 |
""" |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
127 |
self._drop_metadata_constraints_if_necessary() |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
128 |
tablename = 'cw_%s' % etype.lower() |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
129 |
self._dbh.drop_constraints(tablename) |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
130 |
self._dbh.drop_indexes(tablename) |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
131 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized' |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
132 |
'(retype text, type varchar(128), uuid varchar(32))') |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
133 |
self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype', %(uuid)s)", |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
134 |
{'e': etype, 'uuid': self.uuid}) |
11781
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
135 |
|
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
136 |
def master_insert_etype_metadata(self, etype): |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
137 |
"""Massive insertion of meta data for a given etype, based on SQL statements. |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
138 |
|
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
139 |
In master/slabes configuration, you'll usually want to call it from the master once all |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
140 |
slaves have finished (at least slaves won't call it automatically, so that's your |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
141 |
reponsability). |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
142 |
""" |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
143 |
# insert standard metadata relations |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
144 |
for rtype, eid in self.metagen.base_etype_rels(etype).items(): |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
145 |
self._insert_meta_relation(etype, eid, '%s_relation' % rtype) |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
146 |
# insert cw_source, is and is_instance_of relations (normally handled by the system source) |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
147 |
self._insert_meta_relation(etype, self.metagen.source.eid, 'cw_source_relation') |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
148 |
eschema = self.schema[etype] |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
149 |
self._insert_meta_relation(etype, eschema.eid, 'is_relation') |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
150 |
for parent_eschema in chain(eschema.ancestors(), [eschema]): |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
151 |
self._insert_meta_relation(etype, parent_eschema.eid, 'is_instance_of_relation') |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
152 |
# finally insert records into the entities table |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
153 |
self.sql("INSERT INTO entities (eid, type, extid) " |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
154 |
"SELECT cw_eid, '%s', extid FROM cw_%s " |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
155 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
156 |
% (etype, etype.lower())) |
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
157 |
|
11322
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
158 |
# SQL utilities ######################################################### |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
159 |
|
11778
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
160 |
def _drop_metadata_constraints_if_necessary(self): |
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
161 |
"""Drop constraints and indexes for the metadata tables if necessary.""" |
11777
5b535fe2f364
[massive store] Lazy removal of constraints and metadata indexes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11774
diff
changeset
|
162 |
if not self._constraints_dropped: |
11778
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
163 |
self._drop_metadata_constraints() |
11777
5b535fe2f364
[massive store] Lazy removal of constraints and metadata indexes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11774
diff
changeset
|
164 |
self._constraints_dropped = True |
5b535fe2f364
[massive store] Lazy removal of constraints and metadata indexes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11774
diff
changeset
|
165 |
|
11778
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
166 |
def _drop_metadata_constraints(self): |
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
167 |
"""Drop constraints and indexes for the metadata tables. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
168 |
|
11778
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
169 |
They will be recreated by the `finish` method. |
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
170 |
""" |
11780
307d96c0ab5a
[massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11778
diff
changeset
|
171 |
rtypes = [rtype for rtype in self.metagen.meta_relations |
307d96c0ab5a
[massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11778
diff
changeset
|
172 |
if not self.schema.rschema(rtype).final] |
307d96c0ab5a
[massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11778
diff
changeset
|
173 |
rtypes += ('is_instance_of', 'is', 'cw_source') |
307d96c0ab5a
[massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11778
diff
changeset
|
174 |
for rtype in rtypes: |
307d96c0ab5a
[massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11778
diff
changeset
|
175 |
self._dbh.drop_constraints(rtype + '_relation') |
307d96c0ab5a
[massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11778
diff
changeset
|
176 |
self._dbh.drop_indexes(rtype + '_relation') |
11778
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
177 |
# don't drop constraints for the entities table, the only one is the primary key's index on |
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
178 |
# eid and we want to keep it |
9847a097266e
[massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11777
diff
changeset
|
179 |
self._dbh.drop_indexes('entities') |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
180 |
|
11026
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
181 |
def restart_eid_sequence(self, start_eid): |
11323
e9120da559f5
[dataimport] use sql shortcut
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11322
diff
changeset
|
182 |
self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange( |
11026
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
183 |
'entities_id_seq', initial_value=start_eid)) |
ce9b3886955d
[dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
11025
diff
changeset
|
184 |
self._cnx.commit() |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
185 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
186 |
# store api ################################################################ |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
187 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
188 |
def prepare_insert_entity(self, etype, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
189 |
"""Given an entity type, attributes and inlined relations, returns the inserted entity's |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
190 |
eid. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
191 |
""" |
11321
fab543f542ac
[dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11320
diff
changeset
|
192 |
if not self.slave_mode and etype not in self._initialized: |
fab543f542ac
[dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11320
diff
changeset
|
193 |
self._initialized.add(etype) |
11781
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
194 |
self.master_init_etype(etype) |
11326
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
195 |
attrs = self.metagen.base_etype_attrs(etype) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
196 |
data = copy(attrs) # base_etype_attrs is @cached, a copy is necessary |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
197 |
data.update(kwargs) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
198 |
if 'eid' not in data: |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
199 |
# If eid is not given and the eids sequence is set, use the value from the sequence |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
200 |
eid = self.get_next_eid() |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
201 |
data['eid'] = eid |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
202 |
# XXX default values could be set once for all in base entity |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
203 |
default_values = self.default_values[etype] |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
204 |
missing_keys = set(default_values) - set(data) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
205 |
data.update((key, default_values[key]) for key in missing_keys) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
206 |
self.metagen.init_entity_attrs(etype, data['eid'], data) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
207 |
self._data_entities[etype].append(data) |
06eeac9389a3
[dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11325
diff
changeset
|
208 |
return data['eid'] |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
209 |
|
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
210 |
def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
211 |
"""Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
212 |
and ``eid_to``. |
11331
f2ff82dfcd5c
[dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11330
diff
changeset
|
213 |
|
f2ff82dfcd5c
[dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11330
diff
changeset
|
214 |
Relation must not be inlined. |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
215 |
""" |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
216 |
if rtype not in self._initialized: |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
217 |
if not self.slave_mode: |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
218 |
self.master_init() |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
219 |
assert not self._cnx.vreg.schema.rschema(rtype).inlined |
11321
fab543f542ac
[dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11320
diff
changeset
|
220 |
self._initialized.add(rtype) |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
221 |
tablename = '%s_relation' % rtype.lower() |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
222 |
tmp_tablename = '%s_%s' % (tablename, self.uuid) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
223 |
self.sql("INSERT INTO cwmassive_initialized VALUES (%(r)s, 'rtype', %(uuid)s)", |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
224 |
{'r': rtype, 'uuid': self.uuid}) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
225 |
self.sql('CREATE TABLE %s(eid_from integer, eid_to integer)' % tmp_tablename) |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
226 |
self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to}) |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
227 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
228 |
def flush(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
229 |
"""Flush the data""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
230 |
self.flush_entities() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
231 |
self.flush_relations() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
232 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
233 |
def commit(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
234 |
"""Commit the database transaction.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
235 |
self.on_commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
236 |
super(MassiveObjectStore, self).commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
237 |
|
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
238 |
def finish(self): |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
239 |
"""Remove temporary tables and columns.""" |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
240 |
if self.slave_mode: |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
241 |
raise RuntimeError('Store cleanup is not allowed in slave mode') |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
242 |
self.logger.info("Start cleaning") |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
243 |
# Get all the initialized etypes/rtypes |
10878
fda5e42037a9
[dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents:
10877
diff
changeset
|
244 |
if self._dbh.table_exists('cwmassive_initialized'): |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
245 |
cu = self.sql('SELECT retype, type, uuid FROM cwmassive_initialized') |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
246 |
relations = defaultdict(list) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
247 |
for retype, _type, uuid in cu.fetchall(): |
11774
51c160677afe
[repository] Drop the entities.extid column and associated cache
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11773
diff
changeset
|
248 |
if _type == 'rtype': |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
249 |
relations[retype].append(uuid) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
250 |
# get back relation data from the temporary tables |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
251 |
for rtype, uuids in relations.items(): |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
252 |
tablename = '%s_relation' % rtype.lower() |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
253 |
self._dbh.drop_constraints(tablename) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
254 |
self._dbh.drop_indexes(tablename) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
255 |
for uuid in uuids: |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
256 |
tmp_tablename = '%s_%s' % (tablename, uuid) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
257 |
# XXX no index on the original relation table, EXISTS subquery may be sloooow |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
258 |
self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT ' |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
259 |
'T.eid_from, T.eid_to FROM %(tmp_table)s AS T ' |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
260 |
'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE ' |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
261 |
'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);' |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
262 |
% {'table': tablename, 'tmp_table': tmp_tablename}) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
263 |
# Drop temporary relation table and record from cwmassive_initialized |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
264 |
self.sql('DROP TABLE %(tmp_table)s' % {'tmp_table': tmp_tablename}) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
265 |
self.sql('DELETE FROM cwmassive_initialized ' |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
266 |
'WHERE retype = %(rtype)s AND uuid = %(uuid)s', |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
267 |
{'rtype': retype, 'uuid': uuid}) |
11322
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
268 |
self._dbh.restore_indexes_and_constraints() |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
269 |
# Delete the meta data table |
11329
a8cab8fb54ba
[dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11328
diff
changeset
|
270 |
self.sql('DROP TABLE IF EXISTS cwmassive_initialized') |
10863
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
271 |
self.commit() |
8e1f6de61300
[dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
10861
diff
changeset
|
272 |
|
11313
682b15eb2dd2
[dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11312
diff
changeset
|
273 |
# FLUSH ################################################################# |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
274 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
275 |
def on_commit(self): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
276 |
if self.on_commit_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
277 |
self.on_commit_callback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
278 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
279 |
def on_rollback(self, exc, etype, data): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
280 |
if self.on_rollback_callback: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
281 |
self.on_rollback_callback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
282 |
self._cnx.rollback() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
283 |
else: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
284 |
raise exc |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
285 |
|
11707
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
286 |
def flush_relations(self): |
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
287 |
"""Flush the relations data.""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
288 |
for rtype, data in self._data_relations.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
289 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
290 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
291 |
continue |
10871
1d4a94d04ec6
[dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10870
diff
changeset
|
292 |
buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to')) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
293 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
294 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
295 |
raise ValueError |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
296 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
297 |
# Push into the tmp table |
11784
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
298 |
tablename = '%s_relation' % rtype.lower() |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
299 |
tmp_tablename = '%s_%s' % (tablename, self.uuid) |
c1aa50a88de3
[massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11783
diff
changeset
|
300 |
cursor.copy_from(buf, tmp_tablename, null='NULL', columns=('eid_from', 'eid_to')) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
301 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
302 |
self._data_relations[rtype] = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
303 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
304 |
def flush_entities(self): |
11707
2c4518fea26f
[massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11705
diff
changeset
|
305 |
"""Flush the entities data.""" |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
306 |
for etype, data in self._data_entities.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
307 |
if not data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
308 |
# There is no data for these etype for this flush round. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
309 |
continue |
11305
118d83e65ca8
[dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11057
diff
changeset
|
310 |
# XXX It may be interresting to directly infer the columns' names from the schema |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
311 |
# XXX For now, the _create_copyfrom_buffer does a "row[column]" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
312 |
# which can lead to a key error. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
313 |
# Thus we should create dictionary with all the keys. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
314 |
columns = set() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
315 |
for d in data: |
11330
6adfa1e75179
[dataimport] no need to call .keys()
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11329
diff
changeset
|
316 |
columns.update(d) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
317 |
_data = [] |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
318 |
_base_data = dict.fromkeys(columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
319 |
for d in data: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
320 |
_d = _base_data.copy() |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
321 |
_d.update(d) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
322 |
_data.append(_d) |
10871
1d4a94d04ec6
[dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents:
10870
diff
changeset
|
323 |
buf = pgstore._create_copyfrom_buffer(_data, columns) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
324 |
if not buf: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
325 |
# The buffer is empty. This is probably due to error in _create_copyfrom_buffer |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
326 |
raise ValueError('Error in buffer creation for etype %s' % etype) |
11774
51c160677afe
[repository] Drop the entities.extid column and associated cache
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11773
diff
changeset
|
327 |
columns = ['cw_%s' % attr for attr in columns] |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
328 |
cursor = self._cnx.cnxset.cu |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
329 |
try: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
330 |
cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
331 |
except Exception as exc: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
332 |
self.on_rollback(exc, etype, data) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
333 |
# Clear data cache |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
334 |
self._data_entities[etype] = [] |
11329
a8cab8fb54ba
[dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11328
diff
changeset
|
335 |
if not self.slave_mode: |
11781
4ebd968f364c
[massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11780
diff
changeset
|
336 |
self.master_insert_etype_metadata(etype) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
337 |
|
11325
a29443fbd1f2
[dataimport] rename massive store's metagen_push_relation method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11324
diff
changeset
|
338 |
def _insert_meta_relation(self, etype, eid_to, rtype): |
11312
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
339 |
self.sql("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s " |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
340 |
"WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)" |
3a83759854ee
[dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11311
diff
changeset
|
341 |
% (rtype, eid_to, etype.lower())) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
342 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
343 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
344 |
def get_default_values(schema): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
345 |
"""analyzes yams ``schema`` and returns the list of default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
346 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
347 |
The returned value is a dictionary mapping entity types to a |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
348 |
sub-dictionnaries mapping attribute names -> default values. |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
349 |
""" |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
350 |
default_values = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
351 |
# iterates on all entity types |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
352 |
for eschema in schema.entities(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
353 |
# for each entity type, iterates on attribute definitions |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
354 |
default_values[eschema.type] = eschema_constraints = {} |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
355 |
for rschema, _ in eschema.attribute_definitions(): |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
356 |
# for each attribute, if a size constraint is found, |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
357 |
# append it to the size constraint list |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
358 |
if eschema.default(rschema.type) is not None: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
359 |
eschema_constraints[rschema.type] = eschema.default(rschema.type) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
360 |
return default_values |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
361 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
362 |
|
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
363 |
class PGHelper(object): |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
364 |
"""This class provides some helper methods to manipulate a postgres database metadata (index and |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
365 |
constraints). |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
366 |
""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
367 |
|
11310
e0b7277e5394
[dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11308
diff
changeset
|
368 |
def __init__(self, cnx): |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
369 |
self.sql = cnx.system_sql |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
370 |
# Deals with pg schema, see #3216686 |
11310
e0b7277e5394
[dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11308
diff
changeset
|
371 |
pg_schema = cnx.repo.config.system_source_config.get('db-namespace') or 'public' |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
372 |
self.pg_schema = pg_schema |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
373 |
|
11322
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
374 |
def drop_indexes(self, tablename): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
375 |
"""Drop indexes and constraints, storing them in a table for later restore.""" |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
376 |
# Create a table to save the constraints, it allows reloading even after crash |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
377 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints(sql TEXT, insert_order SERIAL)') |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
378 |
indexes = self.table_indexes(tablename) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
379 |
for name, query in indexes.items(): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
380 |
self.sql('INSERT INTO cwmassive_constraints(sql) VALUES (%(sql)s)', {'sql': query}) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
381 |
self.sql('DROP INDEX %s' % name) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
382 |
|
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
383 |
def drop_constraints(self, tablename): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
384 |
self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints(sql TEXT, insert_order SERIAL)') |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
385 |
constraints = self.table_constraints(tablename) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
386 |
for name, query in constraints.items(): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
387 |
self.sql('INSERT INTO cwmassive_constraints(sql) VALUES (%(sql)s)', {'sql': query}) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
388 |
self.sql('ALTER TABLE %s DROP CONSTRAINT %s' % (tablename, name)) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
389 |
|
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
390 |
def restore_indexes_and_constraints(self): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
391 |
"""Restore indexes and constraints.""" |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
392 |
if not self.table_exists('cwmassive_constraints'): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
393 |
return |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
394 |
cu = self.sql('SELECT sql, insert_order FROM cwmassive_constraints ' |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
395 |
'ORDER BY insert_order DESC') |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
396 |
for query, order in cu.fetchall(): |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
397 |
self.sql(query) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
398 |
self.sql('DELETE FROM cwmassive_constraints WHERE insert_order=%(order)s', |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
399 |
{'order': order}) |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
400 |
self.sql('DROP TABLE cwmassive_constraints') |
21316020eae3
[dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11321
diff
changeset
|
401 |
|
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
402 |
def table_exists(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
403 |
"""Return True if the given table already exists in the database.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
404 |
cu = self.sql('SELECT 1 from information_schema.tables ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
405 |
'WHERE table_name=%(t)s AND table_schema=%(s)s', |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
406 |
{'t': tablename, 's': self.pg_schema}) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
407 |
return bool(cu.fetchone()) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
408 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
409 |
def table_indexes_constraints(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
410 |
"""Return one dictionary with all indexes by name, another with all constraints by name, |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
411 |
for the given table. |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
412 |
""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
413 |
indexes = self.table_indexes(tablename) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
414 |
constraints = self.table_constraints(tablename) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
415 |
_indexes = {} |
10856
b839167d99a4
[dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents:
10855
diff
changeset
|
416 |
for name, query in indexes.items(): |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
417 |
# Remove pkey indexes (automatically created by constraints) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
418 |
# Specific cases of primary key, see #3224079 |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
419 |
if name not in constraints: |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
420 |
_indexes[name] = query |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
421 |
return _indexes, constraints |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
422 |
|
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
423 |
def table_indexes(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
424 |
"""Return a dictionary of indexes {index name: index sql}, constraints included.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
425 |
indexes = {} |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
426 |
for name in self._index_names(tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
427 |
indexes[name] = self._index_sql(name) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
428 |
return indexes |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
429 |
|
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
430 |
def table_constraints(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
431 |
"""Return a dictionary of constraints {constraint name: constraint sql}.""" |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
432 |
constraints = {} |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
433 |
for name in self._constraint_names(tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
434 |
query = self._constraint_sql(name) |
10853
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
435 |
constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query) |
de741492538d
[dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff
changeset
|
436 |
return constraints |
11314
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
437 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
438 |
def _index_names(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
439 |
"""Return the names of all indexes in the given table (including constraints.)""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
440 |
cu = self.sql("SELECT c.relname FROM pg_catalog.pg_class c " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
441 |
"JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
442 |
"JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
443 |
"LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
444 |
"LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
445 |
"WHERE c.relkind IN ('i','') " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
446 |
" AND c2.relname = %(t)s " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
447 |
" AND i.indisprimary = FALSE " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
448 |
" AND n.nspname NOT IN ('pg_catalog', 'pg_toast') " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
449 |
" AND pg_catalog.pg_table_is_visible(c.oid);", {'t': tablename}) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
450 |
return [name for name, in cu.fetchall()] |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
451 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
452 |
def _constraint_names(self, tablename): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
453 |
"""Return the names of all constraints in the given table.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
454 |
cu = self.sql("SELECT i.conname FROM pg_catalog.pg_class c " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
455 |
"JOIN pg_catalog.pg_constraint i ON i.conrelid = c.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
456 |
"JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
457 |
"LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
458 |
"LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
459 |
"WHERE c2.relname = %(t)s " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
460 |
"AND n.nspname NOT IN ('pg_catalog', 'pg_toast') " |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
461 |
"AND pg_catalog.pg_table_is_visible(c.oid)", {'t': tablename}) |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
462 |
return [name for name, in cu.fetchall()] |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
463 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
464 |
def _index_sql(self, name): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
465 |
"""Return the SQL to be used to recreate the index of the given name.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
466 |
return self.sql('SELECT pg_get_indexdef(c.oid) FROM pg_catalog.pg_class c ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
467 |
'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
468 |
'WHERE c.relname = %(r)s AND n.nspname=%(n)s', |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
469 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
470 |
|
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
471 |
def _constraint_sql(self, name): |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
472 |
"""Return the SQL to be used to recreate the constraint.""" |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
473 |
return self.sql('SELECT pg_get_constraintdef(c.oid) FROM pg_catalog.pg_constraint c ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
474 |
'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.connamespace ' |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
475 |
'WHERE c.conname = %(r)s AND n.nspname=%(n)s', |
c258bd6b20d8
[dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
11313
diff
changeset
|
476 |
{'r': name, 'n': self.pg_schema}).fetchone()[0] |