cubicweb/dataimport/massive_store.py
author Nicolas Chauvat <nicolas.chauvat@logilab.fr>
Thu, 19 Dec 2019 12:15:29 +0100
branch3.23
changeset 12839 1dfe1cd9946a
parent 11331 f2ff82dfcd5c
child 11704 43aefc671538
permissions -rw-r--r--
closing branch 3.23
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# coding: utf-8
11305
118d83e65ca8 [dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11057
diff changeset
     2
# copyright 2015-2016 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
# This file is part of CubicWeb.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# CubicWeb is free software: you can redistribute it and/or modify it under the
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# terms of the GNU Lesser General Public License as published by the Free
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# Software Foundation, either version 2.1 of the License, or (at your option)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
# any later version.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
# details.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# You should have received a copy of the GNU Lesser General Public License along
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
import logging
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    21
from copy import copy
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
from collections import defaultdict
10854
f437787d8849 [dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents: 10853
diff changeset
    23
from io import StringIO
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
    24
from itertools import chain
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
    25
from base64 import b64encode
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    26
10859
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
    27
from six.moves import range
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
    28
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
from yams.constraints import SizeConstraint
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    30
11313
682b15eb2dd2 [dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11312
diff changeset
    31
from cubicweb.schema import PURE_VIRTUAL_RTYPES
11020
c8c8f6a6147f [dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 11019
diff changeset
    32
from cubicweb.server.schema2sql import rschema_has_table
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
from cubicweb.dataimport import stores, pgstore
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
class MassiveObjectStore(stores.RQLObjectStore):
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    37
    """Store for massive import of data, with delayed insertion of meta data.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
    WARNINGS:
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    40
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    41
    - This store may only be used with PostgreSQL for now, as it relies
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    42
      on the COPY FROM method, and on specific PostgreSQL tables to get all
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    43
      the indexes.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    45
    - This store can only insert relations that are not inlined (i.e.,
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    46
      which do *not* have inlined=True in their definition in the schema), unless they are
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    47
      specified as entity attributes.
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    48
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
    49
    It should be used as follows:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
       store = MassiveObjectStore(cnx)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
       store.init_rtype_table('Person', 'lives_in', 'Location')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    54
10882
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    55
       store.prepare_insert_entity('Person', subj_iid_attribute=person_iid, ...)
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    56
       store.prepare_insert_entity('Location', obj_iid_attribute=location_iid, ...)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
       # subj_iid_attribute and obj_iid_attribute are argument names
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
       # chosen by the user (e.g. "cwuri"). These names can be identical.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
       # person_iid and location_iid are unique IDs and depend on the data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
       # (e.g URI).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
       store.relate_by_iid(person_iid, 'lives_in', location_iid)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
       # For example:
10882
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    66
       store.prepare_insert_entity('Person',
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    67
                                   cwuri='http://dbpedia.org/toto',
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    68
                                   name='Toto')
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    69
       store.prepare_insert_entity('Location',
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    70
                                   uri='http://geonames.org/11111',
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    71
                                   name='Somewhere')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
       store.relate_by_iid('http://dbpedia.org/toto',
10882
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    74
                           'lives_in',
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    75
                           'http://geonames.org/11111')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
       # Finally
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
       store.convert_relations('Person', 'lives_in', 'Location',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
                               'subj_iid_attribute', 'obj_iid_attribute')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
       # For the previous example:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
       store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
       ...
10882
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    82
       store.commit()
634cc266e48f [dataimport] fix doc string to stop referring to the old API
Julien Cristau <julien.cristau@logilab.fr>
parents: 10881
diff changeset
    83
       store.finish()
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    84
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    85
    Full-text indexation is not handled, you'll have to reindex the proper entity types by yourself
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
    86
    if desired.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    87
    """
10873
0611466ce367 [dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents: 10872
diff changeset
    88
    # max size of the iid, used to create the iid_eid conversion table
0611466ce367 [dataimport] turn iid_maxsize into a class attribute
Julien Cristau <julien.cristau@logilab.fr>
parents: 10872
diff changeset
    89
    iid_maxsize = 1024
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    90
10875
75d1b2d66f18 [dataimport] remove autoflush_metadata from MassiveObjectStore parameters
Julien Cristau <julien.cristau@logilab.fr>
parents: 10874
diff changeset
    91
    def __init__(self, cnx,
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
                 on_commit_callback=None, on_rollback_callback=None,
10865
2537df9fdd27 [dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10864
diff changeset
    93
                 slave_mode=False,
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    94
                 eids_seq_range=10000,
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
    95
                 metagen=None):
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
        """ Create a MassiveObject store, with the following attributes:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
        - cnx: CubicWeb cnx
11028
66f94d7f9ca7 [dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents: 11027
diff changeset
    99
        - eids_seq_range: size of eid range reserved by the store for each batch
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
        super(MassiveObjectStore, self).__init__(cnx)
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   102
        self.on_commit_callback = on_commit_callback
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   103
        self.on_rollback_callback = on_rollback_callback
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   104
        self.slave_mode = slave_mode
11028
66f94d7f9ca7 [dataimport] make eids_seq_range as massive store instance attribute again
Julien Cristau <julien.cristau@logilab.fr>
parents: 11027
diff changeset
   105
        self.eids_seq_range = eids_seq_range
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   106
        if metagen is None:
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   107
            metagen = stores.MetadataGenerator(cnx)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   108
        self.metagen = metagen
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   109
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   110
        self.logger = logging.getLogger('dataimport.massive_store')
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   111
        self.sql = cnx.system_sql
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
   112
        self.schema = self._cnx.vreg.schema
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
   113
        self.default_values = get_default_values(self.schema)
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   114
        self.get_next_eid = lambda g=self._get_eid_gen(): next(g)
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   115
        self._dbh = PGHelper(cnx)
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   116
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   117
        cnx.read_security = False
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   118
        cnx.write_security = False
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   119
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   120
        self._data_entities = defaultdict(list)
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   121
        self._data_relations = defaultdict(list)
11320
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   122
        self._initialized = set()
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   123
        # uri handling
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   124
        self._data_uri_relations = defaultdict(list)
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   125
        # etypes for which we have a uri_eid_%(etype)s table
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   126
        self._init_uri_eid = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   127
        # etypes for which we have a uri_eid_%(e)s_idx index
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   128
        self._uri_eid_inserted = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   129
        # set of rtypes for which we have a %(rtype)s_relation_iid_tmp table
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   130
        self._uri_rtypes = set()
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   131
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   132
        if not self.slave_mode:
11315
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   133
            # drop constraint and metadata table, they will be recreated when self.finish() is
ad826d81e88e [dataimport] rework massive store's __init__
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11314
diff changeset
   134
            # called
11020
c8c8f6a6147f [dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 11019
diff changeset
   135
            self._drop_all_constraints()
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   136
            self._drop_metatables_constraints()
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   137
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   138
    def _get_eid_gen(self):
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   139
        """ Function getting the next eid. This is done by preselecting
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   140
        a given number of eids from the 'entities_id_seq', and then
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   141
        storing them"""
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   142
        while True:
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   143
            last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   144
            for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1):
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   145
                yield eid
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
11320
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   147
    # URI related things #######################################################
11020
c8c8f6a6147f [dataimport] massive_store: drop all constraints in __init__ and restore all in finish
Samuel Trégouët <samuel.tregouet@logilab.fr>
parents: 11019
diff changeset
   148
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   149
    def init_rtype_table(self, etype_from, rtype, etype_to):
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   150
        """ Build temporary table for standard rtype """
11320
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   151
        # Create an uri_eid table for each etype for a better control of which etype is concerned by
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   152
        # a particular possibly multivalued relation.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
        for etype in (etype_from, etype_to):
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   154
            if etype and etype not in self._init_uri_eid:
11319
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   155
                self._init_uri_eid.add(etype)
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   156
                self.sql('CREATE TABLE IF NOT EXISTS uri_eid_%(e)s'
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   157
                         '(uri character varying(%(size)s), eid integer)'
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   158
                         % {'e': etype.lower(), 'size': self.iid_maxsize})
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   159
        if rtype not in self._uri_rtypes:
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   160
            # Create the temporary table
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
   161
            if not self.schema.rschema(rtype).inlined:
11311
fd45fc498c1b [dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11310
diff changeset
   162
                self.sql('CREATE TABLE IF NOT EXISTS %(r)s_relation_iid_tmp'
fd45fc498c1b [dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11310
diff changeset
   163
                         '(uri_from character varying(%(s)s), uri_to character varying(%(s)s))'
fd45fc498c1b [dataimport] use IF EXISTS when possible
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11310
diff changeset
   164
                         % {'r': rtype, 's': self.iid_maxsize})
11319
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   165
                self._uri_rtypes.add(rtype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
                self.logger.warning("inlined relation %s: cannot insert it", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   169
    def relate_by_iid(self, iid_from, rtype, iid_to):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   170
        """Add new relation based on the internal id (iid)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
        of the entities (not the eid)"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
        # Push data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
        if isinstance(iid_from, unicode):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
            iid_from = iid_from.encode('utf-8')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
        if isinstance(iid_to, unicode):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
            iid_to = iid_to.encode('utf-8')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
        self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
    def flush_relations(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
        """ Flush the relations data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   182
        for rtype, data in self._data_uri_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   184
                self.logger.info('No data for rtype %s', rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   185
            buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data]))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   186
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   187
                self.logger.info('Empty Buffer for rtype %s', rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   188
                continue
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
            cursor = self._cnx.cnxset.cu
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
   190
            if not self.schema.rschema(rtype).inlined:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
                cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
                                 null='NULL', columns=('uri_from', 'uri_to'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   193
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   194
                self.logger.warning("inlined relation %s: cannot insert it", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
            buf.close()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   197
            self._data_uri_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   199
    def fill_uri_eid_table(self, etype, uri_label):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   200
        """ Fill the uri_eid table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   201
        """
11319
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   202
        if etype not in self._uri_eid_inserted:
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   203
            self._uri_eid_inserted.add(etype)
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   204
            self.logger.info('Fill uri_eid for etype %s', etype)
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   205
            self.sql('INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s'
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   206
                     % {'l': uri_label, 'e': etype.lower()})
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   207
            self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s(uri)'
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   208
                     % {'e': etype.lower()})
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   210
    def convert_relations(self, etype_from, rtype, etype_to,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
                          uri_label_from='cwuri', uri_label_to='cwuri'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   212
        """ Flush the converted relations
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   214
        # Always flush relations to be sure
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
        self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
        self.flush_relations()
11319
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   217
        if uri_label_from:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   218
            self.fill_uri_eid_table(etype_from, uri_label_from)
11319
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   219
        if uri_label_to:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
            self.fill_uri_eid_table(etype_to, uri_label_to)
11316
36c7cd362fc7 [dataimport] add a .schema shortcut attribute on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11315
diff changeset
   221
        if self.schema.rschema(rtype).inlined:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   222
            self.logger.warning("Can't insert inlined relation %s", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
            return
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   224
        if uri_label_from and uri_label_to:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   225
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   227
            WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   228
            SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid);
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   229
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   230
        elif uri_label_to:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   231
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   232
            CAST(T.uri_from AS INTEGER), O1.eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   233
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   234
            WHERE O1.uri=T.uri_to AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   235
            SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
            TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid);
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   237
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   238
        elif uri_label_from:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   239
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   240
            O1.eid, CAST(T.uri_to AS INTEGER)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   241
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   242
            WHERE O1.uri=T.uri_from AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   243
            SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   244
            TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER));
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   245
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
        try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
            self.sql(sql % {'r': rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   248
                            'et': etype_to.lower() if etype_to else u'',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
                            'ef': etype_from.lower() if etype_from else u''})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
        except Exception as ex:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   251
            self.logger.error("Can't insert relation %s: %s", rtype, ex)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   253
    # SQL utilities #########################################################
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
11320
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   255
    def _drop_all_constraints(self):
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   256
        etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities()
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   257
                         if not eschema.final)
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   258
        rtypes_tables = ('%s_relation' % rschema.type.lower() for rschema in self.schema.relations()
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   259
                         if rschema_has_table(rschema, skip_relations=PURE_VIRTUAL_RTYPES))
78da04c853dc [dataimport] move everything related to the "URI handling" feature of the massive store together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11319
diff changeset
   260
        for tablename in chain(etypes_tables, rtypes_tables, ('entities',)):
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   261
            self._dbh.drop_constraints(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
    def _drop_metatables_constraints(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
        """ Drop all the constraints for the meta data"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
        for tablename in ('created_by_relation', 'owned_by_relation',
10861
6205b89c3af6 [massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10859
diff changeset
   266
                          'is_instance_of_relation', 'is_relation',
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   267
                          'entities'):
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   268
            self._dbh.drop_indexes(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   269
11026
ce9b3886955d [dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 11025
diff changeset
   270
    def restart_eid_sequence(self, start_eid):
11323
e9120da559f5 [dataimport] use sql shortcut
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11322
diff changeset
   271
        self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
11026
ce9b3886955d [dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 11025
diff changeset
   272
            'entities_id_seq', initial_value=start_eid))
ce9b3886955d [dataimport] remove eids_seq_start attribute from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 11025
diff changeset
   273
        self._cnx.commit()
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   274
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   275
    # store api ################################################################
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   276
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   277
    def prepare_insert_entity(self, etype, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   278
        """Given an entity type, attributes and inlined relations, returns the inserted entity's
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   279
        eid.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   280
        """
11321
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   281
        if not self.slave_mode and etype not in self._initialized:
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   282
            self._initialized.add(etype)
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   283
            self._dbh.drop_indexes('cw_%s' % etype.lower())
11321
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   284
            self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   285
                     '(retype text, type varchar(128))')
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   286
            self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype})
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   287
            self.sql('ALTER TABLE cw_%s ADD COLUMN extid VARCHAR(256)' % etype.lower())
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   288
        attrs = self.metagen.base_etype_attrs(etype)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   289
        data = copy(attrs)  # base_etype_attrs is @cached, a copy is necessary
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   290
        data.update(kwargs)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   291
        if 'eid' not in data:
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   292
            # If eid is not given and the eids sequence is set, use the value from the sequence
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   293
            eid = self.get_next_eid()
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   294
            data['eid'] = eid
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   295
        # XXX default values could be set once for all in base entity
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   296
        default_values = self.default_values[etype]
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   297
        missing_keys = set(default_values) - set(data)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   298
        data.update((key, default_values[key]) for key in missing_keys)
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   299
        extid = self.metagen.entity_extid(etype, data['eid'], data)
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   300
        if extid is not None:
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   301
            extid = b64encode(extid).decode('ascii')
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   302
        data['extid'] = extid
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   303
        self.metagen.init_entity_attrs(etype, data['eid'], data)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   304
        self._data_entities[etype].append(data)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   305
        return data['eid']
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   306
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   307
    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   308
        """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   309
        and ``eid_to``.
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
   310
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
   311
        Relation must not be inlined.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   312
        """
11321
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   313
        if not self.slave_mode and rtype not in self._initialized:
11331
f2ff82dfcd5c [dataimport] add a bit of extra-documentation on the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11330
diff changeset
   314
            assert not self._cnx.vreg.schema.rschema(rtype).inlined
11321
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   315
            self._initialized.add(rtype)
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   316
            self._dbh.drop_indexes('%s_relation' % rtype.lower())
11321
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   317
            self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)'
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   318
                     % rtype.lower())
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   319
            self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   320
                     '(retype text, type varchar(128))')
fab543f542ac [dataimport] inline some methods of the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11320
diff changeset
   321
            self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'rtype')", {'e': rtype})
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   322
        self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   323
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   324
    def flush(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   325
        """Flush the data"""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   326
        self.flush_entities()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   327
        self.flush_internal_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   328
        self.flush_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   329
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   330
    def commit(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   331
        """Commit the database transaction."""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   332
        self.on_commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   333
        super(MassiveObjectStore, self).commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   334
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   335
    def finish(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   336
        """Remove temporary tables and columns."""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   337
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   338
        if self.slave_mode:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   339
            raise RuntimeError('Store cleanup is not allowed in slave mode')
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   340
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   341
        # Cleanup relations tables
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   342
        for etype in self._init_uri_eid:
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   343
            self.sql('DROP TABLE uri_eid_%s' % etype.lower())
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   344
        # Remove relations tables
10879
3193d9ede8dd [dataimport] drop extra indirection through MassiveObjectStore._initialized dict
Julien Cristau <julien.cristau@logilab.fr>
parents: 10878
diff changeset
   345
        for rtype in self._uri_rtypes:
11319
fe90d07f3afa [dataimport] test for a value is in a set and insertion in a set should live together
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11318
diff changeset
   346
            self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype})
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   347
        # Get all the initialized etypes/rtypes
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   348
        if self._dbh.table_exists('cwmassive_initialized'):
11312
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   349
            cu = self.sql('SELECT retype, type FROM cwmassive_initialized')
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   350
            for retype, _type in cu.fetchall():
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   351
                self.logger.info('Cleanup for %s' % retype)
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   352
                if _type == 'etype':
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   353
                    self.sql('ALTER TABLE cw_%s DROP COLUMN extid' % retype)
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   354
                elif _type == 'rtype':
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   355
                    # Cleanup relations tables
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   356
                    self._cleanup_relations(retype)
10878
fda5e42037a9 [dataimport] remove remaining references to dataio from MassiveObjectStore
Julien Cristau <julien.cristau@logilab.fr>
parents: 10877
diff changeset
   357
                self.sql('DELETE FROM cwmassive_initialized WHERE retype = %(e)s',
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   358
                         {'e': retype})
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   359
        self._dbh.restore_indexes_and_constraints()
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   360
        # Delete the meta data table
11329
a8cab8fb54ba [dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11328
diff changeset
   361
        self.sql('DROP TABLE IF EXISTS cwmassive_initialized')
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   362
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   363
11313
682b15eb2dd2 [dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11312
diff changeset
   364
    # FLUSH #################################################################
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   365
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   366
    def on_commit(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   367
        if self.on_commit_callback:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   368
            self.on_commit_callback()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   369
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   370
    def on_rollback(self, exc, etype, data):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   371
        if self.on_rollback_callback:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   372
            self.on_rollback_callback(exc, etype, data)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   373
            self._cnx.rollback()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   374
        else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   375
            raise exc
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   376
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   377
    def flush_internal_relations(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   378
        """ Flush the relations data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   379
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   380
        for rtype, data in self._data_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   381
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   382
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   383
                continue
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   384
            buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to'))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   385
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   386
                # The buffer is empty. This is probably due to error in _create_copyfrom_buffer
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   387
                raise ValueError
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   388
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   389
            # Push into the tmp table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   390
            cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   391
                             null='NULL', columns=('eid_from', 'eid_to'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   392
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   393
            self._data_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   394
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   395
    def flush_entities(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   396
        """ Flush the entities data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   397
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   398
        for etype, data in self._data_entities.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   399
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   400
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   401
                continue
11305
118d83e65ca8 [dataimport] remove useless assignment in massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11057
diff changeset
   402
            # XXX It may be interresting to directly infer the columns' names from the schema
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   403
            # XXX For now, the _create_copyfrom_buffer does a "row[column]"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   404
            # which can lead to a key error.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   405
            # Thus we should create dictionary with all the keys.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   406
            columns = set()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   407
            for d in data:
11330
6adfa1e75179 [dataimport] no need to call .keys()
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11329
diff changeset
   408
                columns.update(d)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   409
            _data = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   410
            _base_data = dict.fromkeys(columns)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   411
            for d in data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   412
                _d = _base_data.copy()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   413
                _d.update(d)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   414
                _data.append(_d)
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   415
            buf = pgstore._create_copyfrom_buffer(_data, columns)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   416
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   417
                # The buffer is empty. This is probably due to error in _create_copyfrom_buffer
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   418
                raise ValueError('Error in buffer creation for etype %s' % etype)
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   419
            columns = ['cw_%s' % attr if attr != 'extid' else attr
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   420
                       for attr in columns]
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   421
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   422
            try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   423
                cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   424
            except Exception as exc:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   425
                self.on_rollback(exc, etype, data)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   426
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   427
            self._data_entities[etype] = []
11329
a8cab8fb54ba [dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11328
diff changeset
   428
            if not self.slave_mode:
a8cab8fb54ba [dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11328
diff changeset
   429
                self._insert_etype_metadata(etype)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   430
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   431
    def _cleanup_relations(self, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   432
        """ Cleanup rtype table """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   433
        # Push into relation table while removing duplicate
11312
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   434
        self.sql('INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT '
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   435
                 'T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T '
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   436
                 'WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE '
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   437
                 'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);' % {'r': rtype})
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   438
        # Drop temporary relation table
11312
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   439
        self.sql('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()})
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   440
11329
a8cab8fb54ba [dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11328
diff changeset
   441
    def _insert_etype_metadata(self, etype):
a8cab8fb54ba [dataimport] drop massive store's flush_metadata method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11328
diff changeset
   442
        """Massive insertion of meta data for a given etype, based on SQL statements.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   443
        """
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   444
        # insert standard metadata relations
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   445
        for rtype, eid in self.metagen.base_etype_rels(etype).items():
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   446
            self._insert_meta_relation(etype, eid, '%s_relation' % rtype)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   447
        # insert cw_source, is and is_instance_of relations (normally handled by the system source)
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   448
        self._insert_meta_relation(etype, self.metagen.source.eid, 'cw_source_relation')
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   449
        eschema = self.schema[etype]
11325
a29443fbd1f2 [dataimport] rename massive store's metagen_push_relation method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11324
diff changeset
   450
        self._insert_meta_relation(etype, eschema.eid, 'is_relation')
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   451
        for parent_eschema in chain(eschema.ancestors(), [eschema]):
11325
a29443fbd1f2 [dataimport] rename massive store's metagen_push_relation method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11324
diff changeset
   452
            self._insert_meta_relation(etype, parent_eschema.eid, 'is_instance_of_relation')
11326
06eeac9389a3 [dataimport] introduce usage of MetadataGenerator into the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11325
diff changeset
   453
        # finally insert records into the entities table
11312
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   454
        self.sql("INSERT INTO entities (eid, type, asource, extid) "
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   455
                 "SELECT cw_eid, '%s', '%s', extid FROM cw_%s "
11312
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   456
                 "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
11328
9f2d7da47526 [dataimport] test and fix external source support for the massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11326
diff changeset
   457
                 % (etype, self.metagen.source.uri, etype.lower()))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   458
11325
a29443fbd1f2 [dataimport] rename massive store's metagen_push_relation method
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11324
diff changeset
   459
    def _insert_meta_relation(self, etype, eid_to, rtype):
11312
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   460
        self.sql("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s "
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   461
                 "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
3a83759854ee [dataimport] enhance a bit sql queries readability
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11311
diff changeset
   462
                 % (rtype, eid_to, etype.lower()))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   463
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   464
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   465
def get_size_constraints(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   466
    """analyzes yams ``schema`` and returns the list of size constraints.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   467
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   468
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   469
    sub-dictionnaries mapping attribute names -> max size.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   470
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   471
    size_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   472
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   473
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   474
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   475
        size_constraints[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   476
        for rschema, aschema in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   477
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   478
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   479
            maxsize = None
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   480
            rdef = rschema.rdef(eschema, aschema)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   481
            for constraint in rdef.constraints:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   482
                if isinstance(constraint, SizeConstraint):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   483
                    maxsize = constraint.max
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   484
                    eschema_constraints[rschema.type] = maxsize
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   485
    return size_constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   486
11313
682b15eb2dd2 [dataimport] flake8
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11312
diff changeset
   487
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   488
def get_default_values(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   489
    """analyzes yams ``schema`` and returns the list of default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   490
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   491
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   492
    sub-dictionnaries mapping attribute names -> default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   493
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   494
    default_values = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   495
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   496
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   497
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   498
        default_values[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   499
        for rschema, _ in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   500
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   501
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   502
            if eschema.default(rschema.type) is not None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   503
                eschema_constraints[rschema.type] = eschema.default(rschema.type)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   504
    return default_values
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   505
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   506
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   507
class PGHelper(object):
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   508
    """This class provides some helper methods to manipulate a postgres database metadata (index and
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   509
    constraints).
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   510
    """
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   511
11310
e0b7277e5394 [dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11308
diff changeset
   512
    def __init__(self, cnx):
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   513
        self.sql = cnx.system_sql
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   514
        # Deals with pg schema, see #3216686
11310
e0b7277e5394 [dataimport] PGHelper should be responsible to retrieve the database schema
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11308
diff changeset
   515
        pg_schema = cnx.repo.config.system_source_config.get('db-namespace') or 'public'
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   516
        self.pg_schema = pg_schema
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   517
11322
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   518
    def drop_indexes(self, tablename):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   519
        """Drop indexes and constraints, storing them in a table for later restore."""
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   520
        # Create a table to save the constraints, it allows reloading even after crash
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   521
        self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints(sql TEXT, insert_order SERIAL)')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   522
        indexes = self.table_indexes(tablename)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   523
        for name, query in indexes.items():
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   524
            self.sql('INSERT INTO cwmassive_constraints(sql) VALUES (%(sql)s)', {'sql': query})
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   525
            self.sql('DROP INDEX %s' % name)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   526
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   527
    def drop_constraints(self, tablename):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   528
        self.sql('CREATE TABLE IF NOT EXISTS cwmassive_constraints(sql TEXT, insert_order SERIAL)')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   529
        constraints = self.table_constraints(tablename)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   530
        for name, query in constraints.items():
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   531
            self.sql('INSERT INTO cwmassive_constraints(sql) VALUES (%(sql)s)', {'sql': query})
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   532
            self.sql('ALTER TABLE %s DROP CONSTRAINT %s' % (tablename, name))
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   533
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   534
    def restore_indexes_and_constraints(self):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   535
        """Restore indexes and constraints."""
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   536
        if not self.table_exists('cwmassive_constraints'):
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   537
            self.logger.info('The table cwmassive_constraints does not exist')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   538
            return
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   539
        cu = self.sql('SELECT sql, insert_order FROM cwmassive_constraints '
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   540
                      'ORDER BY insert_order DESC')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   541
        for query, order in cu.fetchall():
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   542
            self.sql(query)
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   543
            self.sql('DELETE FROM cwmassive_constraints WHERE insert_order=%(order)s',
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   544
                     {'order': order})
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   545
        self.sql('DROP TABLE cwmassive_constraints')
21316020eae3 [dataimport] move cwmassive_constraint temporary table handling to the PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11321
diff changeset
   546
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   547
    def table_exists(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   548
        """Return True if the given table already exists in the database."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   549
        cu = self.sql('SELECT 1 from information_schema.tables '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   550
                      'WHERE table_name=%(t)s AND table_schema=%(s)s',
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   551
                      {'t': tablename, 's': self.pg_schema})
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   552
        return bool(cu.fetchone())
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   553
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   554
    def table_indexes_constraints(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   555
        """Return one dictionary with all indexes by name, another with all constraints by name,
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   556
        for the given table.
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   557
        """
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   558
        indexes = self.table_indexes(tablename)
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   559
        constraints = self.table_constraints(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   560
        _indexes = {}
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   561
        for name, query in indexes.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   562
            # Remove pkey indexes (automatically created by constraints)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   563
            # Specific cases of primary key, see #3224079
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   564
            if name not in constraints:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   565
                _indexes[name] = query
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   566
        return _indexes, constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   567
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   568
    def table_indexes(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   569
        """Return a dictionary of indexes {index name: index sql}, constraints included."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   570
        indexes = {}
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   571
        for name in self._index_names(tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   572
            indexes[name] = self._index_sql(name)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   573
        return indexes
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   574
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   575
    def table_constraints(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   576
        """Return a dictionary of constraints {constraint name: constraint sql}."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   577
        constraints = {}
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   578
        for name in self._constraint_names(tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   579
            query = self._constraint_sql(name)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   580
            constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   581
        return constraints
11314
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   582
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   583
    def _index_names(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   584
        """Return the names of all indexes in the given table (including constraints.)"""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   585
        cu = self.sql("SELECT c.relname FROM pg_catalog.pg_class c "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   586
                      "JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   587
                      "JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   588
                      "LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   589
                      "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   590
                      "WHERE c.relkind IN ('i','') "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   591
                      " AND c2.relname = %(t)s "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   592
                      " AND i.indisprimary = FALSE "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   593
                      " AND n.nspname NOT IN ('pg_catalog', 'pg_toast') "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   594
                      " AND pg_catalog.pg_table_is_visible(c.oid);", {'t': tablename})
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   595
        return [name for name, in cu.fetchall()]
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   596
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   597
    def _constraint_names(self, tablename):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   598
        """Return the names of all constraints in the given table."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   599
        cu = self.sql("SELECT i.conname FROM pg_catalog.pg_class c "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   600
                      "JOIN pg_catalog.pg_constraint i ON i.conrelid = c.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   601
                      "JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   602
                      "LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   603
                      "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   604
                      "WHERE c2.relname = %(t)s "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   605
                      "AND n.nspname NOT IN ('pg_catalog', 'pg_toast') "
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   606
                      "AND pg_catalog.pg_table_is_visible(c.oid)", {'t': tablename})
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   607
        return [name for name, in cu.fetchall()]
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   608
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   609
    def _index_sql(self, name):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   610
        """Return the SQL to be used to recreate the index of the given name."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   611
        return self.sql('SELECT pg_get_indexdef(c.oid) FROM pg_catalog.pg_class c '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   612
                        'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   613
                        'WHERE c.relname = %(r)s AND n.nspname=%(n)s',
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   614
                        {'r': name, 'n': self.pg_schema}).fetchone()[0]
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   615
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   616
    def _constraint_sql(self, name):
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   617
        """Return the SQL to be used to recreate the constraint."""
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   618
        return self.sql('SELECT pg_get_constraintdef(c.oid) FROM pg_catalog.pg_constraint c '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   619
                        'LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.connamespace '
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   620
                        'WHERE c.conname = %(r)s AND n.nspname=%(n)s',
c258bd6b20d8 [dataimport] rework PGHelper class
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11313
diff changeset
   621
                        {'r': name, 'n': self.pg_schema}).fetchone()[0]