cubicweb/dataimport/massive_store.py
author Philippe Pepiot <ph@itsalwaysdns.eu>
Tue, 31 Mar 2020 19:15:03 +0200
changeset 12957 0c973204033a
parent 12567 26744ad37953
permissions -rw-r--r--
[server] prevent returning closed cursor to the database pool In since c8c6ad8 init_repository use repo.internal_cnx() instead of repo.system_source.get_connection() so it use the pool and we should not close cursors from the pool before returning it back. Otherwise we may have "connection already closed" error. This bug only trigger when connection-pool-size = 1. Since we are moving to use a dynamic pooler we need to get this fixed. This does not occur with sqlite since the connection wrapper instantiate new cursor everytime, but this occur with other databases.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# coding: utf-8
11305
118d83e65ca8 [dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11057
diff changeset
     2
# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
# This file is part of CubicWeb.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# CubicWeb is free software: you can redistribute it and/or modify it under the
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# terms of the GNU Lesser General Public License as published by the Free
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# Software Foundation, either version 2.1 of the License, or (at your option)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
# any later version.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
# details.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# You should have received a copy of the GNU Lesser General Public License along
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
from collections import defaultdict
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
    21
from itertools import chain
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
    22
import logging
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
    23
from uuid import uuid4
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    25
from cubicweb.dataimport import stores, pgstore
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
    26
from cubicweb.server.schema2sql import eschema_sql_def
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
class MassiveObjectStore(stores.RQLObjectStore):
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    30
    """Store for massive import of data, with delayed insertion of meta data.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    31
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
    WARNINGS:
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    33
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    34
    - This store may only be used with PostgreSQL for now, as it relies
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    35
      on the COPY FROM method, and on specific PostgreSQL tables to get all
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    36
      the indexes.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    38
    - This store can only insert relations that are not inlined (i.e.,
11707
2c4518fea26f [massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11705
diff changeset
    39
      which do *not* have inlined=True in their definition in the schema),
2c4518fea26f [massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11705
diff changeset
    40
      unless they are specified as entity attributes.
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    41
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    42
    It should be used as follows:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
       store = MassiveObjectStore(cnx)
11707
2c4518fea26f [massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11705
diff changeset
    45
       eid_p = store.prepare_insert_entity('Person',
11783
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    46
                                           cwuri=u'http://dbpedia.org/toto',
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    47
                                           name=u'Toto')
11707
2c4518fea26f [massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11705
diff changeset
    48
       eid_loc = store.prepare_insert_entity('Location',
11783
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    49
                                             cwuri=u'http://geonames.org/11111',
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    50
                                             name=u'Somewhere')
11707
2c4518fea26f [massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11705
diff changeset
    51
       store.prepare_insert_relation(eid_p, 'lives_in', eid_loc)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
       ...
10882
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    54
       store.commit()
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    55
       store.finish()
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    56
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    57
    Full-text indexation is not handled, you'll have to reindex the proper entity types by yourself
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    58
    if desired.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
11792
f1911a4638af [massive store] remove on_commit / on_rollback parameters
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11791
diff changeset
    61
    def __init__(self, cnx, slave_mode=False, eids_seq_range=10000, metagen=None):
11783
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    62
        """Create a MassiveObject store, with the following arguments:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
11783
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    64
        - `cnx`, a connection to the repository
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    65
        - `metagen`, optional :class:`MetadataGenerator` instance
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    66
        - `eids_seq_range`: size of eid range reserved by the store for each batch
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
        super(MassiveObjectStore, self).__init__(cnx)
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
    69
12567
26744ad37953 Drop python2 support
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 12203
diff changeset
    70
        self.uuid = str(uuid4()).replace('-', '')
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    71
        self.slave_mode = slave_mode
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    72
        if metagen is None:
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    73
            metagen = stores.MetadataGenerator(cnx)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    74
        self.metagen = metagen
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
    75
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    76
        self.logger = logging.getLogger('dataimport.massive_store')
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    77
        self.sql = cnx.system_sql
11783
8865c9e55575 [massive store] docstring and __init__ cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11782
diff changeset
    78
        self.schema = cnx.vreg.schema
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
    79
        self.default_values = get_default_values(self.schema)
11871
5f71460236a4 [massive store] Don't store eids_seq_range as a store attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11802
diff changeset
    80
        self.get_next_eid = lambda g=self._get_eid_gen(eids_seq_range): next(g)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
    81
        self._source_dbhelper = cnx.repo.system_source.dbhelper
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    82
        self._dbh = PGHelper(cnx)
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    83
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    84
        self._data_entities = defaultdict(list)
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    85
        self._data_relations = defaultdict(list)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
    86
        self._initialized = {}
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    87
11871
5f71460236a4 [massive store] Don't store eids_seq_range as a store attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11802
diff changeset
    88
    def _get_eid_gen(self, eids_seq_range):
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    89
        """ Function getting the next eid. This is done by preselecting
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    90
        a given number of eids from the 'entities_id_seq', and then
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    91
        storing them"""
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    92
        while True:
11871
5f71460236a4 [massive store] Don't store eids_seq_range as a store attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11802
diff changeset
    93
            last_eid = self._cnx.repo.system_source.create_eid(self._cnx, eids_seq_range)
5f71460236a4 [massive store] Don't store eids_seq_range as a store attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11802
diff changeset
    94
            for eid in range(last_eid - eids_seq_range + 1, last_eid + 1):
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    95
                yield eid
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
11781
4ebd968f364c [massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11780
diff changeset
    97
    # master/slaves specific API
4ebd968f364c [massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11780
diff changeset
    98
11791
20555214576b [massive store] master_init should commit by default
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11790
diff changeset
    99
    def master_init(self, commit=True):
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   100
        """Initialize database for massive insertion.
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   101
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   102
        This is expected to be called once, by the master store in master/slaves configuration.
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   103
        """
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   104
        assert not self.slave_mode
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   105
        if self not in self._initialized:
12195
81cf4cf60411 [massive store] ensure the cwmassive_initialized table is deleted if needed
David Douard <david.douard@logilab.fr>
parents: 11871
diff changeset
   106
            self.sql('DROP TABLE IF EXISTS cwmassive_initialized')
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   107
            self.sql('CREATE TABLE cwmassive_initialized'
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   108
                     '(retype text, type varchar(128), uuid varchar(32))')
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   109
            self._initialized[self] = None
11791
20555214576b [massive store] master_init should commit by default
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11790
diff changeset
   110
            if commit:
20555214576b [massive store] master_init should commit by default
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11790
diff changeset
   111
                self.commit()
11781
4ebd968f364c [massive store] Reintroduce methods that are necessary to properly handle master/slave configuration
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11780
diff changeset
   112
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   113
    # SQL utilities #########################################################
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   114
11778
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   115
    def _drop_metadata_constraints(self):
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   116
        """Drop constraints and indexes for the metadata tables.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
11778
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   118
        They will be recreated by the `finish` method.
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   119
        """
11780
307d96c0ab5a [massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11778
diff changeset
   120
        rtypes = [rtype for rtype in self.metagen.meta_relations
307d96c0ab5a [massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11778
diff changeset
   121
                  if not self.schema.rschema(rtype).final]
307d96c0ab5a [massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11778
diff changeset
   122
        rtypes += ('is_instance_of', 'is', 'cw_source')
307d96c0ab5a [massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11778
diff changeset
   123
        for rtype in rtypes:
307d96c0ab5a [massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11778
diff changeset
   124
            self._dbh.drop_constraints(rtype + '_relation')
307d96c0ab5a [massive store] Follow configuration of the metadata generator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11778
diff changeset
   125
            self._dbh.drop_indexes(rtype + '_relation')
11778
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   126
        # don't drop constraints for the entities table, the only one is the primary key's index on
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   127
        # eid and we want to keep it
9847a097266e [massive store] Rework constraint/index handling
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11777
diff changeset
   128
        self._dbh.drop_indexes('entities')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   129
11026
ce9b3886955d [dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 11025
diff changeset
   130
    def restart_eid_sequence(self, start_eid):
11323
e9120da559f5 [dataimport] use sql shortcut
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11322
diff changeset
   131
        self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
11026
ce9b3886955d [dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 11025
diff changeset
   132
            'entities_id_seq', initial_value=start_eid))
ce9b3886955d [dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 11025
diff changeset
   133
        self._cnx.commit()
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   134
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   135
    # store api ################################################################
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   136
11785
0cea67f41d0c [massive store] Delay metadata attributes and default values handling to flush_entities
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11784
diff changeset
   137
    def prepare_insert_entity(self, etype, **data):
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   138
        """Given an entity type, attributes and inlined relations, returns the inserted entity's
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   139
        eid.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   140
        """
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   141
        if etype not in self._initialized:
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   142
            if not self.slave_mode:
11791
20555214576b [massive store] master_init should commit by default
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11790
diff changeset
   143
                self.master_init(commit=False)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   144
            tablename = 'cw_%s' % etype.lower()
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   145
            tmp_tablename = '%s_%s' % (tablename, self.uuid)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   146
            self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype', %(uuid)s)",
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   147
                     {'e': etype, 'uuid': self.uuid})
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   148
            attr_defs = eschema_sql_def(self._source_dbhelper, self.schema[etype])
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   149
            self.sql('CREATE TABLE %s(%s);' % (tmp_tablename,
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   150
                                               ', '.join('cw_%s %s' % (column, sqltype)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   151
                                                         for column, sqltype in attr_defs)))
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   152
            self._initialized[etype] = [attr for attr, _ in attr_defs]
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   153
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   154
        if 'eid' not in data:
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   155
            # If eid is not given and the eids sequence is set, use the value from the sequence
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   156
            eid = self.get_next_eid()
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   157
            data['eid'] = eid
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   158
        self._data_entities[etype].append(data)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   159
        return data['eid']
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   160
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   161
    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   162
        """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   163
        and ``eid_to``.
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
   164
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
   165
        Relation must not be inlined.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
        """
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   167
        if rtype not in self._initialized:
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   168
            if not self.slave_mode:
11791
20555214576b [massive store] master_init should commit by default
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11790
diff changeset
   169
                self.master_init(commit=False)
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   170
            assert not self._cnx.vreg.schema.rschema(rtype).inlined
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   171
            self._initialized[rtype] = None
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   172
            tablename = '%s_relation' % rtype.lower()
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   173
            tmp_tablename = '%s_%s' % (tablename, self.uuid)
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   174
            self.sql("INSERT INTO cwmassive_initialized VALUES (%(r)s, 'rtype', %(uuid)s)",
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   175
                     {'r': rtype, 'uuid': self.uuid})
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   176
            self.sql('CREATE TABLE %s(eid_from integer, eid_to integer)' % tmp_tablename)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   177
        self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   178
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   179
    def flush(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   180
        """Flush the data"""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   181
        self.flush_entities()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   182
        self.flush_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   183
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   184
    def finish(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   185
        """Remove temporary tables and columns."""
12171
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   186
        try:
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   187
            self._finish()
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   188
            self._cnx.commit()
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   189
        except Exception:
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   190
            self._cnx.rollback()
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   191
            raise
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   192
        finally:
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   193
            # delete the meta data table
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   194
            self.sql('DROP TABLE IF EXISTS cwmassive_initialized')
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   195
            self.commit()
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   196
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   197
    def _finish(self):
970c32a4c7b7 [massive store] Ensure temporary metadata table get dropped
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11871
diff changeset
   198
        """Remove temporary tables and columns."""
11786
f5d26d3648d4 [massive store] Turn a runtime error into an assertion
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11785
diff changeset
   199
        assert not self.slave_mode, 'finish method should only be called by the master store'
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   200
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   201
        # Get all the initialized etypes/rtypes
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   202
        if self._dbh.table_exists('cwmassive_initialized'):
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   203
            cu = self.sql('SELECT retype, type, uuid FROM cwmassive_initialized')
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   204
            entities = defaultdict(list)
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   205
            relations = defaultdict(list)
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   206
            for retype, _type, uuid in cu.fetchall():
11774
51c160677afe [repository] Drop the entities.extid column and associated cache
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11773
diff changeset
   207
                if _type == 'rtype':
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   208
                    relations[retype].append(uuid)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   209
                else:  # _type = 'etype'
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   210
                    entities[retype].append(uuid)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   211
            # if there is some entities to insert, delete constraint on metadata tables once for all
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   212
            if entities:
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   213
                self._drop_metadata_constraints()
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   214
            # get back entity data from the temporary tables
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   215
            for etype, uuids in entities.items():
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   216
                tablename = 'cw_%s' % etype.lower()
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   217
                attr_defs = eschema_sql_def(self._source_dbhelper, self.schema[etype])
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   218
                columns = ','.join('cw_%s' % attr for attr, _ in attr_defs)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   219
                self._dbh.drop_constraints(tablename)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   220
                self._dbh.drop_indexes(tablename)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   221
                for uuid in uuids:
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   222
                    tmp_tablename = '%s_%s' % (tablename, uuid)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   223
                    self.sql('INSERT INTO %(table)s(%(columns)s) '
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   224
                             'SELECT %(columns)s FROM %(tmp_table)s'
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   225
                             % {'table': tablename, 'tmp_table': tmp_tablename,
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   226
                                'columns': columns})
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   227
                    self._insert_etype_metadata(etype, tmp_tablename)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   228
                    self._tmp_data_cleanup(tmp_tablename, etype, uuid)
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   229
            # get back relation data from the temporary tables
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   230
            for rtype, uuids in relations.items():
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   231
                tablename = '%s_relation' % rtype.lower()
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   232
                self._dbh.drop_constraints(tablename)
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   233
                self._dbh.drop_indexes(tablename)
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   234
                for uuid in uuids:
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   235
                    tmp_tablename = '%s_%s' % (tablename, uuid)
11790
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   236
                    self.fill_relation_table(tablename, tmp_tablename)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   237
                    self._tmp_data_cleanup(tmp_tablename, rtype, uuid)
11787
05b3e44c135f [massive store] Docstring / comment cleanups
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11786
diff changeset
   238
        # restore all deleted indexes and constraints
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   239
        self._dbh.restore_indexes_and_constraints()
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   240
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   241
    def _insert_etype_metadata(self, etype, tmp_tablename):
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   242
        """Massive insertion of meta data for `etype`, with new entities in `tmp_tablename`.
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   243
        """
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   244
        # insert standard metadata relations
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   245
        for rtype, eid in self.metagen.base_etype_rels(etype).items():
11790
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   246
            self.fill_meta_relation_table(tmp_tablename, rtype, eid)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   247
        # insert cw_source, is and is_instance_of relations (normally handled by the system source)
11790
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   248
        self.fill_meta_relation_table(tmp_tablename, 'cw_source', self.metagen.source.eid)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   249
        eschema = self.schema[etype]
11790
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   250
        self.fill_meta_relation_table(tmp_tablename, 'is', eschema.eid)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   251
        for parent_eschema in chain(eschema.ancestors(), [eschema]):
11790
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   252
            self.fill_meta_relation_table(tmp_tablename, 'is_instance_of', parent_eschema.eid)
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   253
        self.fill_entities_table(etype, tmp_tablename)
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   254
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   255
    def fill_entities_table(self, etype, tmp_tablename):
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   256
        # finally insert records into the entities table
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   257
        self.sql("INSERT INTO entities(eid, type) "
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   258
                 "SELECT cw_eid, '%s' FROM %s "
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   259
                 "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   260
                 % (etype, tmp_tablename))
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   261
11790
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   262
    def fill_relation_table(self, tablename, tmp_tablename):
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   263
        # XXX no index on the original relation table, EXISTS subquery may be sloooow
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   264
        self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT '
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   265
                 'T.eid_from, T.eid_to FROM %(tmp_table)s AS T '
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   266
                 'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE '
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   267
                 'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);'
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   268
                 % {'table': tablename, 'tmp_table': tmp_tablename})
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   269
04607da552ac [massive store] isolate SQL statements that one may want to customize
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11789
diff changeset
   270
    def fill_meta_relation_table(self, tmp_tablename, rtype, eid_to):
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   271
        self.sql("INSERT INTO %s_relation(eid_from, eid_to) SELECT cw_eid, %s FROM %s "
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   272
                 "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   273
                 % (rtype, eid_to, tmp_tablename))
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   274
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   275
    def _tmp_data_cleanup(self, tmp_tablename, ertype, uuid):
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   276
        """Drop temporary relation table and record from cwmassive_initialized."""
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   277
        self.sql('DROP TABLE %(tmp_table)s' % {'tmp_table': tmp_tablename})
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   278
        self.sql('DELETE FROM cwmassive_initialized '
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   279
                 'WHERE retype = %(rtype)s AND uuid = %(uuid)s',
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   280
                 {'rtype': ertype, 'uuid': uuid})
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   281
11313
682b15eb2dd2 [dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11312
diff changeset
   282
    # FLUSH #################################################################
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   283
11707
2c4518fea26f [massive store] Drop deprecated code
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11705
diff changeset
   284
    def flush_relations(self):
11787
05b3e44c135f [massive store] Docstring / comment cleanups
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11786
diff changeset
   285
        """Flush the relations data from in-memory structures to a temporary table."""
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   286
        for rtype, data in self._data_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   287
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   288
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   289
                continue
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   290
            buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to'))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   291
            cursor = self._cnx.cnxset.cu
11784
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   292
            tablename = '%s_relation' % rtype.lower()
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   293
            tmp_tablename = '%s_%s' % (tablename, self.uuid)
c1aa50a88de3 [massive store] Use a slave specific table for relation insertion in the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11783
diff changeset
   294
            cursor.copy_from(buf, tmp_tablename, null='NULL', columns=('eid_from', 'eid_to'))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   295
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   296
            self._data_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   297
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   298
    def flush_entities(self):
11787
05b3e44c135f [massive store] Docstring / comment cleanups
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11786
diff changeset
   299
        """Flush the entities data from in-memory structures to a temporary table."""
11785
0cea67f41d0c [massive store] Delay metadata attributes and default values handling to flush_entities
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11784
diff changeset
   300
        metagen = self.metagen
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   301
        for etype, data in self._data_entities.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   302
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   303
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   304
                continue
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   305
            attrs = self._initialized[etype]
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   306
            _base_data = dict.fromkeys(attrs)
11785
0cea67f41d0c [massive store] Delay metadata attributes and default values handling to flush_entities
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11784
diff changeset
   307
            _base_data.update(self.default_values[etype])
0cea67f41d0c [massive store] Delay metadata attributes and default values handling to flush_entities
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11784
diff changeset
   308
            _base_data.update(metagen.base_etype_attrs(etype))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   309
            _data = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   310
            for d in data:
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   311
                # do this first on `d`, because it won't fill keys associated to None as provided by
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   312
                # `_base_data`
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   313
                metagen.init_entity_attrs(etype, d['eid'], d)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   314
                # XXX warn/raise if there is some key not in attrs?
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   315
                _d = _base_data.copy()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   316
                _d.update(d)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   317
                _data.append(_d)
11789
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   318
            buf = pgstore._create_copyfrom_buffer(_data, attrs)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   319
            tablename = 'cw_%s' % etype.lower()
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   320
            tmp_tablename = '%s_%s' % (tablename, self.uuid)
71df2811b422 [massive store] Store entities in temporary table as well
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11788
diff changeset
   321
            columns = ['cw_%s' % attr for attr in attrs]
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   322
            cursor = self._cnx.cnxset.cu
11792
f1911a4638af [massive store] remove on_commit / on_rollback parameters
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents: 11791
diff changeset
   323
            cursor.copy_from(buf, tmp_tablename, null='NULL', columns=columns)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   324
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
            self._data_entities[etype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   327
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   328
def get_default_values(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   329
    """analyzes yams ``schema`` and returns the list of default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   330
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   331
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   332
    sub-dictionnaries mapping attribute names -> default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   333
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   334
    default_values = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   335
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   337
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   338
        default_values[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   339
        for rschema, _ in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   340
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   341
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   342
            if eschema.default(rschema.type) is not None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   343
                eschema_constraints[rschema.type] = eschema.default(rschema.type)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   344
    return default_values
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   345
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   346
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   347
class PGHelper(object):
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   348
    """This class provides some helper methods to manipulate a postgres database metadata (index and
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   349
    constraints).
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   350
    """
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   351
11310
e0b7277e5394 [dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11308
diff changeset
   352
    def __init__(self, cnx):
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   353
        self.sql = cnx.system_sql
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   354
        # Deals with pg schema, see #3216686
11310
e0b7277e5394 [dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11308
diff changeset
   355
        pg_schema = cnx.repo.config.system_source_config.get('db-namespace') or 'public'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   356
        self.pg_schema = pg_schema
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   357
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   358
    def drop_indexes(self, tablename):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   359
        """Drop indexes and constraints, storing them in a table for later restore."""
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   360
        # Create a table to save the constraints, it allows reloading even after crash
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   361
        self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints(sql TEXT, insert_order SERIAL)')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   362
        indexes = self.table_indexes(tablename)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   363
        for name, query in indexes.items():
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   364
            self.sql('INSERT INTO cwmassive_constraints(sql) VALUES (%(sql)s)', {'sql': query})
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   365
            self.sql('DROP INDEX %s' % name)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   366
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   367
    def drop_constraints(self, tablename):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   368
        self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints(sql TEXT, insert_order SERIAL)')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   369
        constraints = self.table_constraints(tablename)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   370
        for name, query in constraints.items():
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   371
            self.sql('INSERT INTO cwmassive_constraints(sql) VALUES (%(sql)s)', {'sql': query})
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   372
            self.sql('ALTER TABLE %s DROP CONSTRAINT %s' % (tablename, name))
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   373
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   374
    def restore_indexes_and_constraints(self):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   375
        """Restore indexes and constraints."""
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   376
        if not self.table_exists('cwmassive_constraints'):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   377
            return
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   378
        cu = self.sql('SELECT sql, insert_order FROM cwmassive_constraints '
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   379
                      'ORDER BY insert_order DESC')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   380
        for query, order in cu.fetchall():
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   381
            self.sql(query)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   382
            self.sql('DELETE FROM cwmassive_constraints WHERE insert_order=%(order)s',
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   383
                     {'order': order})
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   384
        self.sql('DROP TABLE cwmassive_constraints')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   385
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   386
    def table_exists(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   387
        """Return True if the given table already exists in the database."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   388
        cu = self.sql('SELECT 1 from information_schema.tables '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   389
                      'WHERE table_name=%(t)s AND table_schema=%(s)s',
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   390
                      {'t': tablename, 's': self.pg_schema})
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   391
        return bool(cu.fetchone())
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   392
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   393
    def table_indexes(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   394
        """Return a dictionary of indexes {index name: index sql}, constraints included."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   395
        indexes = {}
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   396
        for name in self._index_names(tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   397
            indexes[name] = self._index_sql(name)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   398
        return indexes
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   399
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   400
    def table_constraints(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   401
        """Return a dictionary of constraints {constraint name: constraint sql}."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   402
        constraints = {}
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   403
        for name in self._constraint_names(tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   404
            query = self._constraint_sql(name)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   405
            constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   406
        return constraints
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   407
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   408
    def _index_names(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   409
        """Return the names of all indexes in the given table (including constraints.)"""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   410
        cu = self.sql("SELECT c.relname FROM pg_catalog.pg_class c "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   411
                      "JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   412
                      "JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   413
                      "LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   414
                      "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   415
                      "WHERE c.relkind IN ('i','') "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   416
                      " AND c2.relname = %(t)s "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   417
                      " AND i.indisprimary = FALSE "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   418
                      " AND n.nspname NOT IN ('pg_catalog', 'pg_toast') "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   419
                      " AND pg_catalog.pg_table_is_visible(c.oid);", {'t': tablename})
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   420
        return [name for name, in cu.fetchall()]
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   421
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   422
    def _constraint_names(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   423
        """Return the names of all constraints in the given table."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   424
        cu = self.sql("SELECT i.conname FROM pg_catalog.pg_class c "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   425
                      "JOIN pg_catalog.pg_constraint i ON i.conrelid = c.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   426
                      "JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   427
                      "LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   428
                      "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   429
                      "WHERE c2.relname = %(t)s "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   430
                      "AND n.nspname NOT IN ('pg_catalog', 'pg_toast') "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   431
                      "AND pg_catalog.pg_table_is_visible(c.oid)", {'t': tablename})
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   432
        return [name for name, in cu.fetchall()]
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   433
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   434
    def _index_sql(self, name):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   435
        """Return the SQL to be used to recreate the index of the given name."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   436
        return self.sql('SELECT pg_get_indexdef(c.oid) FROM pg_catalog.pg_class c '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   437
                        'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   438
                        'WHERE c.relname = %(r)s AND n.nspname=%(n)s',
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   439
                        {'r': name, 'n': self.pg_schema}).fetchone()[0]
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   440
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   441
    def _constraint_sql(self, name):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   442
        """Return the SQL to be used to recreate the constraint."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   443
        return self.sql('SELECT pg_get_constraintdef(c.oid) FROM pg_catalog.pg_constraint c '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   444
                        'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.connamespace '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   445
                        'WHERE c.conname = %(r)s AND n.nspname=%(n)s',
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   446
                        {'r': name, 'n': self.pg_schema}).fetchone()[0]