dataimport/massive_store.py
author Julien Cristau <julien.cristau@logilab.fr>
Mon, 09 Nov 2015 16:21:29 +0100
changeset 10879 3193d9ede8dd
parent 10878 fda5e42037a9
child 10880 5fb592895e0f
permissions -rw-r--r--
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict We already have self.__dict__, no need to add another one for a static set of keys.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# coding: utf-8
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
# This file is part of CubicWeb.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# CubicWeb is free software: you can redistribute it and/or modify it under the
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# terms of the GNU Lesser General Public License as published by the Free
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# Software Foundation, either version 2.1 of the License, or (at your option)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
# any later version.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
# details.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# You should have received a copy of the GNU Lesser General Public License along
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
import logging
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    21
from datetime import datetime
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
from collections import defaultdict
10854
f437787d8849 [dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents: 10853
diff changeset
    23
from io import StringIO
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
10859
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
    25
from six.moves import range
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
    26
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
from yams.constraints import SizeConstraint
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
from psycopg2 import ProgrammingError
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    30
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    31
from cubicweb.dataimport import stores, pgstore
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
from cubicweb.utils import make_uid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
from cubicweb.server.sqlutils import SQL_PREFIX
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
class MassiveObjectStore(stores.RQLObjectStore):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
    Store for massive import of data, with delayed insertion of meta data.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
    WARNINGS:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
   - This store may be only used with PostgreSQL for now, as it relies
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
     on the COPY FROM method, and on specific PostgreSQL tables to get all
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
     the indexes.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
   - This store can only insert relations that are not inlined (i.e.,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    45
     which do *not* have inlined=True in their definition in the schema).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    46
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    47
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    48
   It should be used as follows:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    49
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
       store = MassiveObjectStore(cnx)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
       store.init_rtype_table('Person', 'lives_in', 'Location')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    54
       store.create_entity('Person', subj_iid_attribute=person_iid, ...)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    55
       store.create_entity('Location', obj_iid_attribute=location_iid, ...)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    56
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
       # subj_iid_attribute and obj_iid_attribute are argument names
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
       # chosen by the user (e.g. "cwuri"). These names can be identical.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
       # person_iid and location_iid are unique IDs and depend on the data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
       # (e.g URI).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
       store.relate_by_iid(person_iid, 'lives_in', location_iid)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
       # For example:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
       store.create_entity('Person',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
                           cwuri='http://dbpedia.org/toto',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
                           name='Toto')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
       store.create_entity('Location',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
                           uri='http://geonames.org/11111',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
                           name='Somewhere')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
       store.relate_by_iid('http://dbpedia.org/toto',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
                       'lives_in',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
                       'http://geonames.org/11111')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
       # Finally
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
       store.flush_meta_data()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
       store.convert_relations('Person', 'lives_in', 'Location',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
                               'subj_iid_attribute', 'obj_iid_attribute')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
       # For the previous example:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
       store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    82
       store.cleanup()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    83
    """
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
    84
    # size of eid range reserved by the store for each batch
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
    85
    eids_seq_range = 10000
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
    86
    # initial eid (None means use the value in the db)
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
    87
    eids_seq_start = None
10873
0611466ce367 [dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents: 10872
diff changeset
    88
    # max size of the iid, used to create the iid_eid conversion table
0611466ce367 [dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents: 10872
diff changeset
    89
    iid_maxsize = 1024
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    90
10875
75d1b2d66f18 [dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents: 10874
diff changeset
    91
    def __init__(self, cnx,
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
                 on_commit_callback=None, on_rollback_callback=None,
10865
2537df9fdd27 [dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10864
diff changeset
    93
                 slave_mode=False,
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    94
                 source=None):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    95
        """ Create a MassiveObject store, with the following attributes:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
        - cnx: CubicWeb cnx
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
        super(MassiveObjectStore, self).__init__(cnx)
10877
a6a9a8fc94c3 [dataimport] rename logger from dataio.massiveimport to dataimport.massive_store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10876
diff changeset
   100
        self.logger = logging.getLogger('dataimport.massive_store')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
        self._cnx = cnx
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
        self.sql = cnx.system_sql
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   103
        self._data_uri_relations = defaultdict(list)
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   104
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   105
        # etypes for which we have a uri_eid_%(etype)s table
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   106
        self._init_uri_eid = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   107
        # etypes for which we have a uri_eid_%(e)s_idx index
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   108
        self._uri_eid_inserted = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   109
        # set of rtypes for which we have a %(rtype)s_relation_iid_tmp table
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   110
        self._uri_rtypes = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   111
        # set of etypes whose tables are created
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   112
        self._entities = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   113
        # set of rtypes for which we have a %(rtype)s_relation_tmp table
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   114
        self._rtypes = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   115
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   116
        self.sql = self._cnx.system_sql
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
        self.slave_mode = slave_mode
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
        self.size_constraints = get_size_constraints(cnx.vreg.schema)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   119
        self.default_values = get_default_values(cnx.vreg.schema)
10870
9dedf464596b [dataimport] remove pg_schema parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10869
diff changeset
   120
        pg_schema = cnx.repo.config.system_source_config.get('db-namespace', 'public')
9dedf464596b [dataimport] remove pg_schema parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10869
diff changeset
   121
        self._dbh = PGHelper(self._cnx, pg_schema)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   122
        self._data_entities = defaultdict(list)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   123
        self._data_relations = defaultdict(list)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   124
        self._now = datetime.now()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   125
        self._default_cwuri = make_uid('_auto_generated')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   126
        self._count_cwuri = 0
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   127
        self.on_commit_callback = on_commit_callback
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   128
        self.on_rollback_callback = on_rollback_callback
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   129
        # Do our meta tables already exist?
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   130
        self._init_massive_metatables()
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   131
        # Internal markers of initialization
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   132
        if self.eids_seq_start is not None and not self.slave_mode:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   133
            self._cnx.system_sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   134
                'entities_id_seq', initial_value=self.eids_seq_start + 1))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   135
            cnx.commit()
10855
cd91f46fa633 [dataimport] use next builtin instead of next method on iterators
Julien Cristau <julien.cristau@logilab.fr>
parents: 10854
diff changeset
   136
        self.get_next_eid = lambda g=self._get_eid_gen(): next(g)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   137
        # recreate then when self.finish() is called
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   138
        if not self.slave_mode:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   139
            self._drop_metatables_constraints()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   140
        if source is None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   141
            source = cnx.repo.system_source
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   142
        self.source = source
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   143
        self._etype_eid_idx = dict(cnx.execute('Any XN,X WHERE X is CWEType, X name XN'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   144
        cnx.read_security = False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   145
        cnx.write_security = False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   147
    ### INIT FUNCTIONS ########################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   148
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   149
    def init_rtype_table(self, etype_from, rtype, etype_to):
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   150
        """ Build temporary table for standard rtype """
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   151
        # Create an uri_eid table for each etype for a better
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   152
        # control of which etype is concerned by a particular
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
        # possibly multivalued relation.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   154
        for etype in (etype_from, etype_to):
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   155
            if etype and etype not in self._init_uri_eid:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   156
                self._init_uri_eid_table(etype)
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   157
        if rtype not in self._uri_rtypes:
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   158
            # Create the temporary table
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   159
            if not self._cnx.repo.schema.rschema(rtype).inlined:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   160
                try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   161
                    sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   162
                          'varying(%(s)s), uri_to character varying(%(s)s))'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   163
                    self.sql(sql % {'r': rtype, 's': self.iid_maxsize})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   164
                except ProgrammingError:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   165
                    # XXX Already exist (probably due to multiple import)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
                    pass
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
                self.logger.warning("inlined relation %s: cannot insert it", rtype)
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   169
            # Add it to the initialized set
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   170
            self._uri_rtypes.add(rtype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
    def _init_uri_eid_table(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
        """ Build a temporary table for id/eid convertion
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
        try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
            sql = "CREATE TABLE uri_eid_%(e)s (uri character varying(%(size)s), eid integer)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
            self.sql(sql % {'e': etype.lower(), 'size': self.iid_maxsize,})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
        except ProgrammingError:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
            # XXX Already exist (probably due to multiple import)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
            pass
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
        # Add it to the initialized set
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   182
        self._init_uri_eid.add(etype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   184
    def _init_massive_metatables(self):
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   185
        # Check if our tables are not already created (i.e. a restart)
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   186
        self._initialized_table_created = self._dbh.table_exists('cwmassive_initialized')
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   187
        self._constraint_table_created = self._dbh.table_exists('cwmassive_constraints')
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   188
        self._metadata_table_created = self._dbh.table_exists('cwmassive_metadata')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   190
    ### RELATE FUNCTION #######################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
    def relate_by_iid(self, iid_from, rtype, iid_to):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   193
        """Add new relation based on the internal id (iid)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   194
        of the entities (not the eid)"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
        # Push data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
        if isinstance(iid_from, unicode):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   197
            iid_from = iid_from.encode('utf-8')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
        if isinstance(iid_to, unicode):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   199
            iid_to = iid_to.encode('utf-8')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   200
        self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   201
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   202
    ### FLUSH FUNCTIONS #######################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   203
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   204
    def flush_relations(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   205
        """ Flush the relations data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   206
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   207
        for rtype, data in self._data_uri_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   208
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
                self.logger.info('No data for rtype %s', rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   210
            buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data]))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   212
                self.logger.info('Empty Buffer for rtype %s', rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
                continue
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   214
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
            if not self._cnx.repo.schema.rschema(rtype).inlined:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
                cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   217
                                 null='NULL', columns=('uri_from', 'uri_to'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   218
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   219
                self.logger.warning("inlined relation %s: cannot insert it", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
            buf.close()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   221
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   222
            self._data_uri_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   224
    def fill_uri_eid_table(self, etype, uri_label):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   225
        """ Fill the uri_eid table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   227
        self.logger.info('Fill uri_eid for etype %s', etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   228
        sql = 'INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   229
        self.sql(sql % {'l': uri_label, 'e': etype.lower()})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   230
        # Add indexes
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   231
        self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   232
        # Set the etype as converted
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   233
        self._uri_eid_inserted.add(etype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   234
        self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   235
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
    def convert_relations(self, etype_from, rtype, etype_to,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   237
                          uri_label_from='cwuri', uri_label_to='cwuri'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   238
        """ Flush the converted relations
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   239
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   240
        # Always flush relations to be sure
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   241
        self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   242
        self.flush_relations()
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   243
        if uri_label_from and etype_from not in self._uri_eid_inserted:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   244
            self.fill_uri_eid_table(etype_from, uri_label_from)
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   245
        if uri_label_to and etype_to not in self._uri_eid_inserted:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
            self.fill_uri_eid_table(etype_to, uri_label_to)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
        if self._cnx.repo.schema.rschema(rtype).inlined:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   248
            self.logger.warning("Can't insert inlined relation %s", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
            return
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
        if uri_label_from and uri_label_to:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   251
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   253
            WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
            SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid);
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   255
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   256
        elif uri_label_to:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   257
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   258
            CAST(T.uri_from AS INTEGER), O1.eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   259
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   260
            WHERE O1.uri=T.uri_to AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   261
            SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
            TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid);
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
        elif uri_label_from:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   266
            O1.eid, CAST(T.uri_to AS INTEGER)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   267
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   268
            WHERE O1.uri=T.uri_from AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   269
            SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   270
            TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER));
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   271
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   272
        try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   273
            self.sql(sql % {'r': rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   274
                            'et': etype_to.lower() if etype_to else u'',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   275
                            'ef': etype_from.lower() if etype_from else u''})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   276
        except Exception as ex:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   277
            self.logger.error("Can't insert relation %s: %s", rtype, ex)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   278
        self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   279
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   280
    ### SQL UTILITIES #########################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   281
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   282
    def drop_and_store_indexes_constraints(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   283
        # Drop indexes and constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   284
        if not self._constraint_table_created:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   285
            # Create a table to save the constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   286
            # Allow reload even after crash
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   287
            sql = "CREATE TABLE cwmassive_constraints (origtable text, query text, type varchar(256))"
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   288
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   289
            self._constraint_table_created = True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   290
        self._drop_table_constraints_indexes(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   291
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   292
    def _drop_table_constraints_indexes(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   293
        """ Drop and store table constraints and indexes """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   294
        indexes, constraints = self._dbh.application_indexes_constraints(tablename)
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   295
        for name, query in constraints.items():
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   296
            sql = 'INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   297
            self.sql(sql, {'e': tablename, 'c': query, 't': 'constraint'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   298
            sql = 'ALTER TABLE %s DROP CONSTRAINT %s CASCADE' % (tablename, name)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   299
            self.sql(sql)
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   300
        for name, query in indexes.items():
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   301
            sql = 'INSERT INTO cwmassive_constraints VALUES (%(e)s, %(c)s, %(t)s)'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   302
            self.sql(sql, {'e': tablename, 'c': query, 't': 'index'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   303
            sql = 'DROP INDEX %s' % name
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   304
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   305
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   306
    def reapply_constraint_index(self, tablename):
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   307
        if not self._dbh.table_exists('cwmassive_constraints'):
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   308
            self.logger.info('The table cwmassive_constraints does not exist')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   309
            return
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   310
        sql = 'SELECT query FROM cwmassive_constraints WHERE origtable = %(e)s'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   311
        crs = self.sql(sql, {'e': tablename})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   312
        for query, in crs.fetchall():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   313
            self.sql(query)
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   314
            self.sql('DELETE FROM cwmassive_constraints WHERE origtable = %(e)s '
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   315
                     'AND query = %(q)s', {'e': tablename, 'q': query})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   316
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   317
    def _drop_metatables_constraints(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   318
        """ Drop all the constraints for the meta data"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   319
        for tablename in ('created_by_relation', 'owned_by_relation',
10861
6205b89c3af6 [massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10859
diff changeset
   320
                          'is_instance_of_relation', 'is_relation',
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   321
                          'entities'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   322
            self.drop_and_store_indexes_constraints(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   323
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   324
    def _create_metatables_constraints(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
        """ Create all the constraints for the meta data"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
        for tablename in ('entities',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   327
                          'created_by_relation', 'owned_by_relation',
10861
6205b89c3af6 [massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10859
diff changeset
   328
                          'is_instance_of_relation', 'is_relation'):
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   329
            # Indexes and constraints
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   330
            self.reapply_constraint_index(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   331
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   332
    def init_relation_table(self, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   333
        """ Get and remove all indexes for performance sake """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   334
        # Create temporary table
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   335
        if not self.slave_mode and rtype not in self._rtypes:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
            sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   337
            self.sql(sql)
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   338
            # Drop indexes and constraints
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   339
            tablename = '%s_relation' % rtype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   340
            self.drop_and_store_indexes_constraints(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   341
            # Push the etype in the initialized table for easier restart
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   342
            self.init_create_initialized_table()
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   343
            sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   344
            self.sql(sql, {'e': rtype, 't': 'rtype'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   345
            # Mark rtype as "initialized" for faster check
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   346
            self._rtypes.add(rtype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   347
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   348
    def init_create_initialized_table(self):
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   349
        """ Create the cwmassive initialized table
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   350
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   351
        if not self._initialized_table_created:
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   352
            sql = "CREATE TABLE cwmassive_initialized (retype text, type varchar(128))"
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   353
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   354
            self._initialized_table_created = True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   355
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   356
    def init_etype_table(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   357
        """ Add eid sequence to a particular etype table and
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   358
        remove all indexes for performance sake """
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   359
        if etype not in self._entities:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   360
            # Only for non-initialized etype and not slave mode store
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   361
            if not self.slave_mode:
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   362
                if self.eids_seq_range is None:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   363
                    # Eids are directly set by the entities_id_seq.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   364
                    # We attach this sequence to all the created etypes.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   365
                    sql = ("ALTER TABLE cw_%s ALTER COLUMN cw_eid "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   366
                           "SET DEFAULT nextval('entities_id_seq')" % etype.lower())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   367
                    self.sql(sql)
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   368
                # Drop indexes and constraints
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   369
                tablename = 'cw_%s' % etype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   370
                self.drop_and_store_indexes_constraints(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   371
                # Push the etype in the initialized table for easier restart
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   372
                self.init_create_initialized_table()
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   373
                sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   374
                self.sql(sql, {'e': etype, 't': 'etype'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   375
            # Mark etype as "initialized" for faster check
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   376
            self._entities.add(etype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   377
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   378
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   379
    ### ENTITIES CREATION #####################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   380
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   381
    def _get_eid_gen(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   382
        """ Function getting the next eid. This is done by preselecting
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   383
        a given number of eids from the 'entities_id_seq', and then
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   384
        storing them"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   385
        while True:
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   386
            last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range)
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   387
            for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1):
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   388
                yield eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   389
10864
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   390
    def _apply_size_constraints(self, etype, kwargs):
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   391
        """Apply the size constraints for a given etype, attribute and value."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   392
        size_constraints = self.size_constraints[etype]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   393
        for attr, value in kwargs.items():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   394
            if value:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   395
                maxsize = size_constraints.get(attr)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   396
                if maxsize is not None and len(value) > maxsize:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   397
                    kwargs[attr] = value[:maxsize-4] + '...'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   398
10864
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   399
    def _apply_default_values(self, etype, kwargs):
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   400
        """Apply the default values for a given etype, attribute and value."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   401
        default_values = self.default_values[etype]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   402
        missing_keys = set(default_values) - set(kwargs)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   403
        kwargs.update((key, default_values[key]) for key in missing_keys)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   404
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   405
    # store api ################################################################
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   406
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   407
    def prepare_insert_entity(self, etype, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   408
        """Given an entity type, attributes and inlined relations, returns the inserted entity's
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   409
        eid.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   410
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   411
        # Init the table if necessary
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   412
        self.init_etype_table(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   413
        # Add meta data if not given
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   414
        if 'modification_date' not in kwargs:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   415
            kwargs['modification_date'] = self._now
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   416
        if 'creation_date' not in kwargs:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   417
            kwargs['creation_date'] = self._now
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   418
        if 'cwuri' not in kwargs:
10874
538e17174769 [dataimport] remove uri_param_name from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents: 10873
diff changeset
   419
            kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri)
538e17174769 [dataimport] remove uri_param_name from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents: 10873
diff changeset
   420
            self._count_cwuri += 1
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   421
        if 'eid' not in kwargs and self.eids_seq_range is not None:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   422
            # If eid is not given and the eids sequence is set,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   423
            # use the value from the sequence
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   424
            kwargs['eid'] = self.get_next_eid()
10864
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   425
        self._apply_size_constraints(etype, kwargs)
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   426
        self._apply_default_values(etype, kwargs)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   427
        self._data_entities[etype].append(kwargs)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   428
        return kwargs.get('eid')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   429
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   430
    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   431
        """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   432
        and ``eid_to``.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   433
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   434
        # Init the table if necessary
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   435
        self.init_relation_table(rtype)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   436
        self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   437
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   438
    def flush(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   439
        """Flush the data"""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   440
        self.flush_entities()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   441
        self.flush_internal_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   442
        self.flush_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   443
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   444
    def commit(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   445
        """Commit the database transaction."""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   446
        self.on_commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   447
        super(MassiveObjectStore, self).commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   448
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   449
    def finish(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   450
        """Remove temporary tables and columns."""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   451
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   452
        if self.slave_mode:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   453
            raise RuntimeError('Store cleanup is not allowed in slave mode')
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   454
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   455
        # Cleanup relations tables
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   456
        for etype in self._init_uri_eid:
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   457
            self.sql('DROP TABLE uri_eid_%s' % etype.lower())
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   458
        # Remove relations tables
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   459
        for rtype in self._uri_rtypes:
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   460
            if not self._cnx.repo.schema.rschema(rtype).inlined:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   461
                self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   462
            else:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   463
                self.logger.warning("inlined relation %s: no cleanup to be done for it" % rtype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   464
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   465
        # Get all the initialized etypes/rtypes
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   466
        if self._dbh.table_exists('cwmassive_initialized'):
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   467
            crs = self.sql('SELECT retype, type FROM cwmassive_initialized')
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   468
            for retype, _type in crs.fetchall():
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   469
                self.logger.info('Cleanup for %s' % retype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   470
                if _type == 'etype':
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   471
                    # Cleanup entities tables - Recreate indexes
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   472
                    self._cleanup_entities(retype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   473
                elif _type == 'rtype':
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   474
                    # Cleanup relations tables
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   475
                    self._cleanup_relations(retype)
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   476
                self.sql('DELETE FROM cwmassive_initialized WHERE retype = %(e)s',
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   477
                         {'e': retype})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   478
        # Create meta constraints (entities, is_instance_of, ...)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   479
        self._create_metatables_constraints()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   480
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   481
        # Delete the meta data table
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   482
        for table_name in ('cwmassive_initialized', 'cwmassive_constraints', 'cwmassive_metadata'):
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   483
            if self._dbh.table_exists(table_name):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   484
                self.sql('DROP TABLE %s' % table_name)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   485
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   486
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   487
    ### FLUSH #################################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   488
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   489
    def on_commit(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   490
        if self.on_commit_callback:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   491
            self.on_commit_callback()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   492
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   493
    def on_rollback(self, exc, etype, data):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   494
        if self.on_rollback_callback:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   495
            self.on_rollback_callback(exc, etype, data)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   496
            self._cnx.rollback()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   497
        else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   498
            raise exc
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   499
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   500
    def flush_internal_relations(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   501
        """ Flush the relations data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   502
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   503
        for rtype, data in self._data_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   504
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   505
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   506
                continue
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   507
            buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to'))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   508
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   509
                # The buffer is empty. This is probably due to error in _create_copyfrom_buffer
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   510
                raise ValueError
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   511
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   512
            # Push into the tmp table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   513
            cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   514
                             null='NULL', columns=('eid_from', 'eid_to'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   515
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   516
            self._data_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   517
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   518
    def flush_entities(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   519
        """ Flush the entities data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   520
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   521
        for etype, data in self._data_entities.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   522
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   523
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   524
                continue
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   525
            # XXX It may be interresting to directly infer the columns'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   526
            # names from the schema instead of using .keys()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   527
            columns = data[0].keys()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   528
            # XXX For now, the _create_copyfrom_buffer does a "row[column]"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   529
            # which can lead to a key error.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   530
            # Thus we should create dictionary with all the keys.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   531
            columns = set()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   532
            for d in data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   533
                columns.update(d.keys())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   534
            _data = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   535
            _base_data = dict.fromkeys(columns)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   536
            for d in data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   537
                _d = _base_data.copy()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   538
                _d.update(d)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   539
                _data.append(_d)
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   540
            buf = pgstore._create_copyfrom_buffer(_data, columns)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   541
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   542
                # The buffer is empty. This is probably due to error in _create_copyfrom_buffer
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   543
                raise ValueError('Error in buffer creation for etype %s' % etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   544
            columns = ['cw_%s' % attr for attr in columns]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   545
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   546
            try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   547
                cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   548
            except Exception as exc:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   549
                self.on_rollback(exc, etype, data)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   550
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   551
            self._data_entities[etype] = []
10875
75d1b2d66f18 [dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents: 10874
diff changeset
   552
        self.flush_meta_data()
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   553
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   554
    def flush_meta_data(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   555
        """ Flush the meta data (entities table, is_instance table, ...)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   556
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   557
        if self.slave_mode:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   558
            raise RuntimeError('Flushing meta data is not allow in slave mode')
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   559
        if not self._dbh.table_exists('cwmassive_initialized'):
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   560
            self.logger.info('No information available for initialized etypes/rtypes')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   561
            return
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   562
        if not self._metadata_table_created:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   563
            # Keep the correctly flush meta data in database
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   564
            sql = "CREATE TABLE cwmassive_metadata (etype text)"
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   565
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   566
            self._metadata_table_created = True
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   567
        crs = self.sql('SELECT etype FROM cwmassive_metadata')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   568
        already_flushed = set(e for e, in crs.fetchall())
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   569
        crs = self.sql('SELECT retype FROM cwmassive_initialized WHERE type = %(t)s',
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   570
                       {'t': 'etype'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   571
        all_etypes = set(e for e, in crs.fetchall())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   572
        for etype in all_etypes:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   573
            if etype not in already_flushed:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   574
                # Deals with meta data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   575
                self.logger.info('Flushing meta data for %s' % etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   576
                self.insert_massive_meta_data(etype)
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   577
                sql = 'INSERT INTO cwmassive_metadata VALUES (%(e)s)'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   578
                self.sql(sql, {'e': etype})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   579
        # Final commit
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   580
        self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   581
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   582
    def _cleanup_entities(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   583
        """ Cleanup etype table """
10872
ff4f94cfa2fb [dataimport] turn eids_seq_{start,range} into class attributes
Julien Cristau <julien.cristau@logilab.fr>
parents: 10871
diff changeset
   584
        if self.eids_seq_range is None:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   585
            # Remove DEFAULT eids sequence if added
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   586
            sql = 'ALTER TABLE cw_%s ALTER COLUMN cw_eid DROP DEFAULT;' % etype.lower()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   587
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   588
        # Create indexes and constraints
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   589
        tablename = SQL_PREFIX + etype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   590
        self.reapply_constraint_index(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   591
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   592
    def _cleanup_relations(self, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   593
        """ Cleanup rtype table """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   594
        # Push into relation table while removing duplicate
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   595
        sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   596
                 T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   597
                 WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   598
                 TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);''' % {'r': rtype}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   599
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   600
        # Drop temporary relation table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   601
        sql = ('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   602
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   603
        # Create indexes and constraints
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   604
        tablename = '%s_relation' % rtype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   605
        self.reapply_constraint_index(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   606
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   607
    def insert_massive_meta_data(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   608
        """ Massive insertion of meta data for a given etype, based on SQL statements.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   609
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   610
        # Push data - Use coalesce to avoid NULL (and get 0), if there is no
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   611
        # entities of this type in the entities table.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   612
        # Meta data relations
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   613
        self.metagen_push_relation(etype, self._cnx.user.eid, 'created_by_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   614
        self.metagen_push_relation(etype, self._cnx.user.eid, 'owned_by_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   615
        self.metagen_push_relation(etype, self.source.eid, 'cw_source_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   616
        self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   617
        self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_instance_of_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   618
        sql = ("INSERT INTO entities (eid, type, asource, extid) "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   619
               "SELECT cw_eid, '%s', 'system', NULL FROM cw_%s "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   620
               "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   621
               % (etype, etype.lower()))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   622
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   623
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   624
    def metagen_push_relation(self, etype, eid_to, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   625
        sql = ("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   626
               "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   627
               % (rtype, eid_to, etype.lower()))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   628
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   629
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   630
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   631
### CONSTRAINTS MANAGEMENT FUNCTIONS  ##########################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   632
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   633
def get_size_constraints(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   634
    """analyzes yams ``schema`` and returns the list of size constraints.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   635
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   636
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   637
    sub-dictionnaries mapping attribute names -> max size.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   638
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   639
    size_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   640
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   641
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   642
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   643
        size_constraints[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   644
        for rschema, aschema in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   645
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   646
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   647
            maxsize = None
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   648
            rdef = rschema.rdef(eschema, aschema)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   649
            for constraint in rdef.constraints:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   650
                if isinstance(constraint, SizeConstraint):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   651
                    maxsize = constraint.max
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   652
                    eschema_constraints[rschema.type] = maxsize
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   653
    return size_constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   654
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   655
def get_default_values(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   656
    """analyzes yams ``schema`` and returns the list of default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   657
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   658
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   659
    sub-dictionnaries mapping attribute names -> default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   660
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   661
    default_values = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   662
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   663
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   664
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   665
        default_values[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   666
        for rschema, _ in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   667
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   668
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   669
            if eschema.default(rschema.type) is not None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   670
                eschema_constraints[rschema.type] = eschema.default(rschema.type)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   671
    return default_values
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   672
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   673
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   674
class PGHelper(object):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   675
    def __init__(self, cnx, pg_schema='public'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   676
        self.cnx = cnx
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   677
        # Deals with pg schema, see #3216686
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   678
        self.pg_schema = pg_schema
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   679
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   680
    def application_indexes_constraints(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   681
        """ Get all the indexes/constraints for a given tablename """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   682
        indexes = self.application_indexes(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   683
        constraints = self.application_constraints(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   684
        _indexes = {}
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   685
        for name, query in indexes.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   686
            # Remove pkey indexes (automatically created by constraints)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   687
            # Specific cases of primary key, see #3224079
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   688
            if name not in constraints:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   689
                _indexes[name] = query
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   690
        return _indexes, constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   691
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   692
    def table_exists(self, table_name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   693
        sql = "SELECT * from information_schema.tables WHERE table_name=%(t)s AND table_schema=%(s)s"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   694
        crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   695
        res = crs.fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   696
        if res:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   697
            return True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   698
        return False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   699
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   700
    # def check_if_primary_key_exists_for_table(self, table_name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   701
    #     sql = ("SELECT constraint_name FROM information_schema.table_constraints "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   702
    #            "WHERE constraint_type = 'PRIMARY KEY' AND table_name=%(t)s AND table_schema=%(s)s")
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   703
    #     crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   704
    #     res = crs.fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   705
    #     if res:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   706
    #         return True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   707
    #     return False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   708
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   709
    def index_query(self, name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   710
        """Get the request to be used to recreate the index"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   711
        return self.cnx.system_sql("SELECT pg_get_indexdef(c.oid) "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   712
                                   "from pg_catalog.pg_class c "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   713
                                   "LEFT JOIN pg_catalog.pg_namespace n "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   714
                                   "ON n.oid = c.relnamespace "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   715
                                   "WHERE c.relname = %(r)s AND n.nspname=%(n)s",
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   716
                                   {'r': name, 'n': self.pg_schema}).fetchone()[0]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   717
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   718
    def constraint_query(self, name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   719
        """Get the request to be used to recreate the constraint"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   720
        return self.cnx.system_sql("SELECT pg_get_constraintdef(c.oid) "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   721
                                   "from pg_catalog.pg_constraint c "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   722
                                   "LEFT JOIN pg_catalog.pg_namespace n "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   723
                                   "ON n.oid = c.connamespace "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   724
                                   "WHERE c.conname = %(r)s AND n.nspname=%(n)s",
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   725
                                   {'r': name, 'n': self.pg_schema}).fetchone()[0]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   726
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   727
    def application_indexes(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   728
        """ Iterate over all the indexes """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   729
        # This SQL query (cf http://www.postgresql.org/message-id/432F450F.4080700@squiz.net)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   730
        # aims at getting all the indexes for each table.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   731
        sql = '''SELECT c.relname as "Name"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   732
        FROM pg_catalog.pg_class c
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   733
        JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   734
        JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   735
        LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   736
        LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   737
        WHERE c.relkind IN ('i','')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   738
        AND c2.relname = '%s'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   739
        AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   740
        AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   741
        indexes_list = self.cnx.system_sql(sql).fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   742
        indexes = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   743
        for name, in indexes_list:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   744
            indexes[name] = self.index_query(name)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   745
        return indexes
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   746
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   747
    def application_constraints(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   748
        """ Iterate over all the constraints """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   749
        sql = '''SELECT i.conname as "Name"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   750
                 FROM pg_catalog.pg_class c JOIN pg_catalog.pg_constraint i
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   751
                 ON i.conrelid = c.oid JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   752
                 LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   753
                 LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   754
                 WHERE c2.relname = '%s' AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   755
                 AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   756
        indexes_list = self.cnx.system_sql(sql).fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   757
        constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   758
        for name, in indexes_list:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   759
            query = self.constraint_query(name)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   760
            constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   761
        return constraints