dataimport/massive_store.py
author Julien Cristau <julien.cristau@logilab.fr>
Mon, 09 Nov 2015 15:29:07 +0100
changeset 10871 1d4a94d04ec6
parent 10870 9dedf464596b
child 10872 ff4f94cfa2fb
permissions -rw-r--r--
[dataimport] remove replace_sep parameter from massive store It is unused since changeset efbbf1e93a04 "[dataimport] Properly escape strings sent to COPY FROM"
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     1
# coding: utf-8
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     2
# copyright 2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
# This file is part of CubicWeb.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# CubicWeb is free software: you can redistribute it and/or modify it under the
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# terms of the GNU Lesser General Public License as published by the Free
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# Software Foundation, either version 2.1 of the License, or (at your option)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
# any later version.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT ANY
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
# details.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
#
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# You should have received a copy of the GNU Lesser General Public License along
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
import logging
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    21
from datetime import datetime
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
from collections import defaultdict
10854
f437787d8849 [dataimport] import StringIO from io
Julien Cristau <julien.cristau@logilab.fr>
parents: 10853
diff changeset
    23
from io import StringIO
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
10859
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
    25
from six.moves import range
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
    26
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
from yams.constraints import SizeConstraint
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    29
from psycopg2 import ProgrammingError
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    30
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    31
from cubicweb.dataimport import stores, pgstore
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    32
from cubicweb.utils import make_uid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    33
from cubicweb.server.sqlutils import SQL_PREFIX
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    34
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    36
class MassiveObjectStore(stores.RQLObjectStore):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    37
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
    Store for massive import of data, with delayed insertion of meta data.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    40
    WARNINGS:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
   - This store may be only used with PostgreSQL for now, as it relies
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    42
     on the COPY FROM method, and on specific PostgreSQL tables to get all
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    43
     the indexes.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    44
   - This store can only insert relations that are not inlined (i.e.,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    45
     which do *not* have inlined=True in their definition in the schema).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    46
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    47
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    48
   It should be used as follows:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    49
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
       store = MassiveObjectStore(cnx)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    51
       store.init_rtype_table('Person', 'lives_in', 'Location')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    52
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    53
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    54
       store.create_entity('Person', subj_iid_attribute=person_iid, ...)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    55
       store.create_entity('Location', obj_iid_attribute=location_iid, ...)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    56
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    57
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    58
       # subj_iid_attribute and obj_iid_attribute are argument names
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
       # chosen by the user (e.g. "cwuri"). These names can be identical.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
       # person_iid and location_iid are unique IDs and depend on the data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    61
       # (e.g URI).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    62
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    63
       store.relate_by_iid(person_iid, 'lives_in', location_iid)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    64
       # For example:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    65
       store.create_entity('Person',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    66
                           cwuri='http://dbpedia.org/toto',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
                           name='Toto')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
       store.create_entity('Location',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
                           uri='http://geonames.org/11111',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    70
                           name='Somewhere')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    71
       store.flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
       store.relate_by_iid('http://dbpedia.org/toto',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    73
                       'lives_in',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    74
                       'http://geonames.org/11111')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    75
       # Finally
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    76
       store.flush_meta_data()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    77
       store.convert_relations('Person', 'lives_in', 'Location',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    78
                               'subj_iid_attribute', 'obj_iid_attribute')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    79
       # For the previous example:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    80
       store.convert_relations('Person', 'lives_in', 'Location', 'cwuri', 'uri')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    81
       ...
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    82
       store.cleanup()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    83
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    84
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    85
    def __init__(self, cnx, autoflush_metadata=True,
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
    86
                 commit_at_flush=True,
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    87
                 iid_maxsize=1024, uri_param_name='rdf:about',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    88
                 eids_seq_range=10000, eids_seq_start=None,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    89
                 on_commit_callback=None, on_rollback_callback=None,
10865
2537df9fdd27 [dataimport] drop no more used parameter on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10864
diff changeset
    90
                 slave_mode=False,
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    91
                 source=None):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    92
        """ Create a MassiveObject store, with the following attributes:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    93
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    94
        - cnx: CubicWeb cnx
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    95
        - autoflush_metadata: Boolean.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    96
                              Automatically flush the metadata after
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    97
                              each flush()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    98
        - commit_at_flush: Boolean. Commit after each flush().
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    99
        - eids_seq_range: Int. Range of the eids_seq_range to be fetched each time
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   100
                               by the store (default is 10000).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   101
                               If None, the sequence eids is attached to each entity tables
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   102
                               (backward compatibility with the 0.2.0).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   103
        - eids_seq_start: Int. Set the eids sequence value (if None, nothing is done).
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   104
        - iid_maxsize: Int. Max size of the iid, used to create the
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   105
                    iid_eid convertion table.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   106
        - uri_param_name: String. If given, will use this parameter to get cw_uri
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   107
                          for entities.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   108
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   109
        super(MassiveObjectStore, self).__init__(cnx)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   110
        self.logger = logging.getLogger('dataio.relationmixin')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   111
        self._cnx = cnx
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   112
        self.sql = cnx.system_sql
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
        self.iid_maxsize = iid_maxsize
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   114
        self.commit_at_flush = commit_at_flush
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   115
        self._data_uri_relations = defaultdict(list)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   116
        self._initialized = {'init_uri_eid': set(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   117
                             'uri_eid_inserted': set(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
                             'uri_rtypes': set(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   119
                             'entities': set(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   120
                             'rtypes': set(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   121
                            }
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   122
        self.sql = self._cnx.system_sql
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   123
        self.logger = logging.getLogger('dataio.massiveimport')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   124
        self.autoflush_metadata = autoflush_metadata
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   125
        self.slave_mode = slave_mode
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   126
        self.size_constraints = get_size_constraints(cnx.vreg.schema)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   127
        self.default_values = get_default_values(cnx.vreg.schema)
10870
9dedf464596b [dataimport] remove pg_schema parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10869
diff changeset
   128
        pg_schema = cnx.repo.config.system_source_config.get('db-namespace', 'public')
9dedf464596b [dataimport] remove pg_schema parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10869
diff changeset
   129
        self._dbh = PGHelper(self._cnx, pg_schema)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   130
        self._data_entities = defaultdict(list)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   131
        self._data_relations = defaultdict(list)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   132
        self._now = datetime.now()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   133
        self._default_cwuri = make_uid('_auto_generated')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   134
        self.uri_param_name = uri_param_name
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   135
        self._count_cwuri = 0
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   136
        self.commit_at_flush = commit_at_flush
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   137
        self.on_commit_callback = on_commit_callback
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   138
        self.on_rollback_callback = on_rollback_callback
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   139
        # Initialized the meta tables of dataio for warm restart
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   140
        self._init_dataio_metatables()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   141
        # Internal markers of initialization
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   142
        self._eids_seq_range = eids_seq_range
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   143
        self._eids_seq_start = eids_seq_start
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   144
        if self._eids_seq_start is not None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   145
            self._cnx.system_sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   146
                'entities_id_seq', initial_value=self._eids_seq_start + 1))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   147
            cnx.commit()
10855
cd91f46fa633 [dataimport] use next builtin instead of next method on iterators
Julien Cristau <julien.cristau@logilab.fr>
parents: 10854
diff changeset
   148
        self.get_next_eid = lambda g=self._get_eid_gen(): next(g)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   149
        # recreate then when self.finish() is called
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   150
        if not self.slave_mode:
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   151
            self._drop_metatables_constraints()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   152
        if source is None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   153
            source = cnx.repo.system_source
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   154
        self.source = source
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   155
        self._etype_eid_idx = dict(cnx.execute('Any XN,X WHERE X is CWEType, X name XN'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   156
        cnx.read_security = False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   157
        cnx.write_security = False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   158
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   159
    ### INIT FUNCTIONS ########################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   160
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   161
    def init_rtype_table(self, etype_from, rtype, etype_to):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   162
        """ Build temporary table a for standard rtype """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   163
        # Create an uri_eid table for each etype for a better
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   164
        # control of which etype is concerns for a particular
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   165
        # possibly multivalued relation.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   166
        for etype in (etype_from, etype_to):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   167
            if etype and etype not in self._initialized['init_uri_eid']:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   168
                self._init_uri_eid_table(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   169
        if rtype not in self._initialized['uri_rtypes']:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   170
            # Create the temporary tables
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   171
            if not self._cnx.repo.schema.rschema(rtype).inlined:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   172
                try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   173
                    sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   174
                          'varying(%(s)s), uri_to character varying(%(s)s))'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   175
                    self.sql(sql % {'r': rtype, 's': self.iid_maxsize})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   176
                except ProgrammingError:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   177
                    # XXX Already exist (probably due to multiple import)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   178
                    pass
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   179
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   180
                self.logger.warning("inlined relation %s: cannot insert it", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   181
            #Add it to the initialized set
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   182
            self._initialized['uri_rtypes'].add(rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   184
    def _init_uri_eid_table(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   185
        """ Build a temporary table for id/eid convertion
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   186
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   187
        try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   188
            sql = "CREATE TABLE uri_eid_%(e)s (uri character varying(%(size)s), eid integer)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   189
            self.sql(sql % {'e': etype.lower(), 'size': self.iid_maxsize,})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   190
        except ProgrammingError:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   191
            # XXX Already exist (probably due to multiple import)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   192
            pass
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   193
        # Add it to the initialized set
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   194
        self._initialized['init_uri_eid'].add(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   195
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   196
    def _init_dataio_metatables(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   197
        """ Initialized the meta tables of dataio for warm restart
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   198
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   199
        # Check if dataio tables are not already created (i.e. a restart)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   200
        self._initialized_table_created = self._dbh.table_exists('dataio_initialized')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   201
        self._constraint_table_created = self._dbh.table_exists('dataio_constraints')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   202
        self._metadata_table_created = self._dbh.table_exists('dataio_metadata')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   203
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   204
    ### RELATE FUNCTION #######################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   205
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   206
    def relate_by_iid(self, iid_from, rtype, iid_to):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   207
        """Add new relation based on the internal id (iid)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   208
        of the entities (not the eid)"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   209
        # Push data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   210
        if isinstance(iid_from, unicode):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   211
            iid_from = iid_from.encode('utf-8')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   212
        if isinstance(iid_to, unicode):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   213
            iid_to = iid_to.encode('utf-8')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   214
        self._data_uri_relations[rtype].append({'uri_from': iid_from, 'uri_to': iid_to})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   215
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   216
    ### FLUSH FUNCTIONS #######################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   217
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   218
    def flush_relations(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   219
        """ Flush the relations data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   220
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   221
        for rtype, data in self._data_uri_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   222
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   223
                self.logger.info('No data for rtype %s', rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   224
            buf = StringIO('\n'.join(['%(uri_from)s\t%(uri_to)s' % d for d in data]))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   225
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   226
                self.logger.info('Empty Buffer for rtype %s', rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   227
                continue
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   228
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   229
            if not self._cnx.repo.schema.rschema(rtype).inlined:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   230
                cursor.copy_from(buf, '%s_relation_iid_tmp' % rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   231
                                 null='NULL', columns=('uri_from', 'uri_to'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   232
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   233
                self.logger.warning("inlined relation %s: cannot insert it", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   234
            buf.close()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   235
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   236
            self._data_uri_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   237
            # Commit if asked
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   238
            if self.commit_at_flush:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   239
                self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   240
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   241
    def fill_uri_eid_table(self, etype, uri_label):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   242
        """ Fill the uri_eid table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   243
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   244
        self.logger.info('Fill uri_eid for etype %s', etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   245
        sql = 'INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   246
        self.sql(sql % {'l': uri_label, 'e': etype.lower()})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   247
        # Add indexes
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   248
        self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   249
        # Set the etype as converted
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   250
        self._initialized['uri_eid_inserted'].add(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   251
        self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   252
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   253
    def convert_relations(self, etype_from, rtype, etype_to,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   254
                          uri_label_from='cwuri', uri_label_to='cwuri'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   255
        """ Flush the converted relations
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   256
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   257
        # Always flush relations to be sure
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   258
        self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   259
        self.flush_relations()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   260
        if uri_label_from and etype_from not in self._initialized['uri_eid_inserted']:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   261
            self.fill_uri_eid_table(etype_from, uri_label_from)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   262
        if uri_label_to and etype_to not in self._initialized['uri_eid_inserted']:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   263
            self.fill_uri_eid_table(etype_to, uri_label_to)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   264
        if self._cnx.repo.schema.rschema(rtype).inlined:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   265
            self.logger.warning("Can't insert inlined relation %s", rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   266
            return
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   267
        if uri_label_from and uri_label_to:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   268
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, O2.eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   269
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1, uri_eid_%(et)s as O2
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   270
            WHERE O1.uri=T.uri_from AND O2.uri=T.uri_to AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   271
            SELECT 1 FROM %(r)s_relation AS TT WHERE TT.eid_from=O1.eid AND TT.eid_to=O2.eid);
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   272
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   273
        elif uri_label_to:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   274
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   275
            CAST(T.uri_from AS INTEGER), O1.eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   276
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(et)s as O1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   277
            WHERE O1.uri=T.uri_to AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   278
            SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   279
            TT.eid_from=CAST(T.uri_from AS INTEGER) AND TT.eid_to=O1.eid);
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   280
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   281
        elif uri_label_from:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   282
            sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT O1.eid, T.uri_to
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   283
            O1.eid, CAST(T.uri_to AS INTEGER)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   284
            FROM %(r)s_relation_iid_tmp AS T, uri_eid_%(ef)s as O1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   285
            WHERE O1.uri=T.uri_from AND NOT EXISTS (
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   286
            SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   287
            TT.eid_from=O1.eid AND TT.eid_to=CAST(T.uri_to AS INTEGER));
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   288
            '''
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   289
        try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   290
            self.sql(sql % {'r': rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   291
                            'et': etype_to.lower() if etype_to else u'',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   292
                            'ef': etype_from.lower() if etype_from else u''})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   293
        except Exception as ex:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   294
            self.logger.error("Can't insert relation %s: %s", rtype, ex)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   295
        self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   296
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   297
    ### SQL UTILITIES #########################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   298
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   299
    def drop_and_store_indexes_constraints(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   300
        # Drop indexes and constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   301
        if not self._constraint_table_created:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   302
            # Create a table to save the constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   303
            # Allow reload even after crash
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   304
            sql = "CREATE TABLE dataio_constraints (origtable text, query text, type varchar(256))"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   305
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   306
            self._constraint_table_created = True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   307
        self._drop_table_constraints_indexes(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   308
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   309
    def _drop_table_constraints_indexes(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   310
        """ Drop and store table constraints and indexes """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   311
        indexes, constraints = self._dbh.application_indexes_constraints(tablename)
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   312
        for name, query in constraints.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   313
            sql = 'INSERT INTO dataio_constraints VALUES (%(e)s, %(c)s, %(t)s)'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   314
            self.sql(sql, {'e': tablename, 'c': query, 't': 'constraint'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   315
            sql = 'ALTER TABLE %s DROP CONSTRAINT %s CASCADE' % (tablename, name)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   316
            self.sql(sql)
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   317
        for name, query in indexes.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   318
            sql = 'INSERT INTO dataio_constraints VALUES (%(e)s, %(c)s, %(t)s)'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   319
            self.sql(sql, {'e': tablename, 'c': query, 't': 'index'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   320
            sql = 'DROP INDEX %s' % name
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   321
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   322
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   323
    def reapply_constraint_index(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   324
        if not self._dbh.table_exists('dataio_constraints'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   325
            self.logger.info('The table dataio_constraints does not exist '
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   326
                             '(keep_index option should be True)')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   327
            return
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   328
        sql = 'SELECT query FROM dataio_constraints WHERE origtable = %(e)s'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   329
        crs = self.sql(sql, {'e': tablename})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   330
        for query, in crs.fetchall():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   331
            self.sql(query)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   332
            self.sql('DELETE FROM dataio_constraints WHERE origtable = %(e)s '
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   333
                     'AND query = %(q)s', {'e': tablename, 'q': query})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   334
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   335
    def _drop_metatables_constraints(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   336
        """ Drop all the constraints for the meta data"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   337
        for tablename in ('created_by_relation', 'owned_by_relation',
10861
6205b89c3af6 [massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10859
diff changeset
   338
                          'is_instance_of_relation', 'is_relation',
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   339
                          'entities'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   340
            self.drop_and_store_indexes_constraints(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   341
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   342
    def _create_metatables_constraints(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   343
        """ Create all the constraints for the meta data"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   344
        for tablename in ('entities',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   345
                          'created_by_relation', 'owned_by_relation',
10861
6205b89c3af6 [massive store] identity_relation should not be considered, is_relation seems missing
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10859
diff changeset
   346
                          'is_instance_of_relation', 'is_relation'):
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   347
            # Indexes and constraints
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   348
            self.reapply_constraint_index(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   349
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   350
    def init_relation_table(self, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   351
        """ Get and remove all indexes for performance sake """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   352
        # Create temporary table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   353
        if not self.slave_mode and rtype not in self._initialized['rtypes']:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   354
            sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   355
            self.sql(sql)
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   356
            # Drop indexes and constraints
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   357
            tablename = '%s_relation' % rtype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   358
            self.drop_and_store_indexes_constraints(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   359
            # Push the etype in the initialized table for easier restart
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   360
            self.init_create_initialized_table()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   361
            sql = 'INSERT INTO dataio_initialized VALUES (%(e)s, %(t)s)'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   362
            self.sql(sql, {'e': rtype, 't': 'rtype'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   363
            # Mark rtype as "initialized" for faster check
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   364
            self._initialized['rtypes'].add(rtype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   365
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   366
    def init_create_initialized_table(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   367
        """ Create the dataio initialized table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   368
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   369
        if not self._initialized_table_created:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   370
            sql = "CREATE TABLE dataio_initialized (retype text, type varchar(128))"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   371
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   372
            self._initialized_table_created = True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   373
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   374
    def init_etype_table(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   375
        """ Add eid sequence to a particular etype table and
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   376
        remove all indexes for performance sake """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   377
        if etype not in self._initialized['entities']:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   378
            # Only for non-initialized etype and not slave mode store
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   379
            if not self.slave_mode:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   380
                if self._eids_seq_range is None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   381
                    # Eids are directly set by the entities_id_seq.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   382
                    # We attach this sequence to all the created etypes.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   383
                    sql = ("ALTER TABLE cw_%s ALTER COLUMN cw_eid "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   384
                           "SET DEFAULT nextval('entities_id_seq')" % etype.lower())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   385
                    self.sql(sql)
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   386
                # Drop indexes and constraints
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   387
                tablename = 'cw_%s' % etype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   388
                self.drop_and_store_indexes_constraints(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   389
                # Push the etype in the initialized table for easier restart
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   390
                self.init_create_initialized_table()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   391
                sql = 'INSERT INTO dataio_initialized VALUES (%(e)s, %(t)s)'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   392
                self.sql(sql, {'e': etype, 't': 'etype'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   393
            # Mark etype as "initialized" for faster check
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   394
            self._initialized['entities'].add(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   395
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   396
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   397
    ### ENTITIES CREATION #####################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   398
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   399
    def _get_eid_gen(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   400
        """ Function getting the next eid. This is done by preselecting
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   401
        a given number of eids from the 'entities_id_seq', and then
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   402
        storing them"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   403
        while True:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   404
            last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self._eids_seq_range)
10859
375a8232e61c [dataimport] import range from six.moves
Julien Cristau <julien.cristau@logilab.fr>
parents: 10856
diff changeset
   405
            for eid in range(last_eid - self._eids_seq_range + 1, last_eid + 1):
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   406
                yield eid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   407
10864
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   408
    def _apply_size_constraints(self, etype, kwargs):
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   409
        """Apply the size constraints for a given etype, attribute and value."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   410
        size_constraints = self.size_constraints[etype]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   411
        for attr, value in kwargs.items():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   412
            if value:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   413
                maxsize = size_constraints.get(attr)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   414
                if maxsize is not None and len(value) > maxsize:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   415
                    kwargs[attr] = value[:maxsize-4] + '...'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   416
10864
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   417
    def _apply_default_values(self, etype, kwargs):
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   418
        """Apply the default values for a given etype, attribute and value."""
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   419
        default_values = self.default_values[etype]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   420
        missing_keys = set(default_values) - set(kwargs)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   421
        kwargs.update((key, default_values[key]) for key in missing_keys)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   422
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   423
    # store api ################################################################
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   424
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   425
    def prepare_insert_entity(self, etype, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   426
        """Given an entity type, attributes and inlined relations, returns the inserted entity's
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   427
        eid.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   428
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   429
        # Init the table if necessary
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   430
        self.init_etype_table(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   431
        # Add meta data if not given
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   432
        if 'modification_date' not in kwargs:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   433
            kwargs['modification_date'] = self._now
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   434
        if 'creation_date' not in kwargs:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   435
            kwargs['creation_date'] = self._now
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   436
        if 'cwuri' not in kwargs:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   437
            if self.uri_param_name and self.uri_param_name in kwargs:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   438
                kwargs['cwuri'] = kwargs[self.uri_param_name]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   439
            else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   440
                kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   441
                self._count_cwuri += 1
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   442
        if 'eid' not in kwargs and self._eids_seq_range is not None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   443
            # If eid is not given and the eids sequence is set,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   444
            # use the value from the sequence
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   445
            kwargs['eid'] = self.get_next_eid()
10864
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   446
        self._apply_size_constraints(etype, kwargs)
b7f4acf0473b [dataimport] methods that modify in-place shouldn't return value
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10863
diff changeset
   447
        self._apply_default_values(etype, kwargs)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   448
        self._data_entities[etype].append(kwargs)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   449
        return kwargs.get('eid')
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   450
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   451
    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   452
        """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   453
        and ``eid_to``.
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   454
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   455
        # Init the table if necessary
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   456
        self.init_relation_table(rtype)
10863
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   457
        self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   458
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   459
    def flush(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   460
        """Flush the data"""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   461
        self.flush_entities()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   462
        self.flush_internal_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   463
        self.flush_relations()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   464
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   465
    def commit(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   466
        """Commit the database transaction."""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   467
        self.on_commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   468
        super(MassiveObjectStore, self).commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   469
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   470
    def finish(self):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   471
        """Remove temporary tables and columns."""
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   472
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   473
        if self.slave_mode:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   474
            raise RuntimeError('Store cleanup is not allowed in slave mode')
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   475
        self.logger.info("Start cleaning")
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   476
        # Cleanup relations tables
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   477
        for etype in self._initialized['init_uri_eid']:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   478
            self.sql('DROP TABLE uri_eid_%s' % etype.lower())
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   479
        # Remove relations tables
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   480
        for rtype in self._initialized['uri_rtypes']:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   481
            if not self._cnx.repo.schema.rschema(rtype).inlined:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   482
                self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   483
            else:
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   484
                self.logger.warning("inlined relation %s: no cleanup to be done for it" % rtype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   485
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   486
        # Get all the initialized etypes/rtypes
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   487
        if self._dbh.table_exists('dataio_initialized'):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   488
            crs = self.sql('SELECT retype, type FROM dataio_initialized')
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   489
            for retype, _type in crs.fetchall():
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   490
                self.logger.info('Cleanup for %s' % retype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   491
                if _type == 'etype':
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   492
                    # Cleanup entities tables - Recreate indexes
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   493
                    self._cleanup_entities(retype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   494
                elif _type == 'rtype':
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   495
                    # Cleanup relations tables
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   496
                    self._cleanup_relations(retype)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   497
                self.sql('DELETE FROM dataio_initialized WHERE retype = %(e)s',
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   498
                         {'e': retype})
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   499
        # Create meta constraints (entities, is_instance_of, ...)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   500
        self._create_metatables_constraints()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   501
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   502
        # Delete the meta data table
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   503
        for table_name in ('dataio_initialized', 'dataio_constraints', 'dataio_metadata'):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   504
            if self._dbh.table_exists(table_name):
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   505
                self.sql('DROP TABLE %s' % table_name)
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   506
        self.commit()
8e1f6de61300 [dataimport] implement new store API on massive store
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10861
diff changeset
   507
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   508
    ### FLUSH #################################################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   509
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   510
    def on_commit(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   511
        if self.on_commit_callback:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   512
            self.on_commit_callback()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   513
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   514
    def on_rollback(self, exc, etype, data):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   515
        if self.on_rollback_callback:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   516
            self.on_rollback_callback(exc, etype, data)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   517
            self._cnx.rollback()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   518
        else:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   519
            raise exc
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   520
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   521
    def flush_internal_relations(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   522
        """ Flush the relations data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   523
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   524
        for rtype, data in self._data_relations.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   525
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   526
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   527
                continue
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   528
            buf = pgstore._create_copyfrom_buffer(data, ('eid_from', 'eid_to'))
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   529
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   530
                # The buffer is empty. This is probably due to error in _create_copyfrom_buffer
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   531
                raise ValueError
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   532
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   533
            # Push into the tmp table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   534
            cursor.copy_from(buf, '%s_relation_tmp' % rtype.lower(),
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   535
                             null='NULL', columns=('eid_from', 'eid_to'))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   536
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   537
            self._data_relations[rtype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   538
            # Commit if asked
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   539
            if self.commit_at_flush:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   540
                self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   541
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   542
    def flush_entities(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   543
        """ Flush the entities data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   544
        """
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   545
        for etype, data in self._data_entities.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   546
            if not data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   547
                # There is no data for these etype for this flush round.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   548
                continue
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   549
            # XXX It may be interresting to directly infer the columns'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   550
            # names from the schema instead of using .keys()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   551
            columns = data[0].keys()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   552
            # XXX For now, the _create_copyfrom_buffer does a "row[column]"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   553
            # which can lead to a key error.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   554
            # Thus we should create dictionary with all the keys.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   555
            columns = set()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   556
            for d in data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   557
                columns.update(d.keys())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   558
            _data = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   559
            _base_data = dict.fromkeys(columns)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   560
            for d in data:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   561
                _d = _base_data.copy()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   562
                _d.update(d)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   563
                _data.append(_d)
10871
1d4a94d04ec6 [dataimport] remove replace_sep parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10870
diff changeset
   564
            buf = pgstore._create_copyfrom_buffer(_data, columns)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   565
            if not buf:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   566
                # The buffer is empty. This is probably due to error in _create_copyfrom_buffer
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   567
                raise ValueError('Error in buffer creation for etype %s' % etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   568
            columns = ['cw_%s' % attr for attr in columns]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   569
            cursor = self._cnx.cnxset.cu
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   570
            try:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   571
                cursor.copy_from(buf, 'cw_%s' % etype.lower(), null='NULL', columns=columns)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   572
            except Exception as exc:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   573
                self.on_rollback(exc, etype, data)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   574
            # Clear data cache
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   575
            self._data_entities[etype] = []
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   576
        if self.autoflush_metadata:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   577
            self.flush_meta_data()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   578
        # Commit if asked
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   579
        if self.commit_at_flush:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   580
            self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   581
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   582
    def flush_meta_data(self):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   583
        """ Flush the meta data (entities table, is_instance table, ...)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   584
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   585
        if self.slave_mode:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   586
            raise RuntimeError('Flushing meta data is not allow in slave mode')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   587
        if not self._dbh.table_exists('dataio_initialized'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   588
            self.logger.info('No information available for initialized etypes/rtypes')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   589
            return
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   590
        if not self._metadata_table_created:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   591
            # Keep the correctly flush meta data in database
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   592
            sql = "CREATE TABLE dataio_metadata (etype text)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   593
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   594
            self._metadata_table_created = True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   595
        crs = self.sql('SELECT etype FROM dataio_metadata')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   596
        already_flushed = set(e for e, in crs.fetchall())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   597
        crs = self.sql('SELECT retype FROM dataio_initialized WHERE type = %(t)s',
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   598
                       {'t': 'etype'})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   599
        all_etypes = set(e for e, in crs.fetchall())
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   600
        for etype in all_etypes:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   601
            if etype not in already_flushed:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   602
                # Deals with meta data
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   603
                self.logger.info('Flushing meta data for %s' % etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   604
                self.insert_massive_meta_data(etype)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   605
                sql = 'INSERT INTO dataio_metadata VALUES (%(e)s)'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   606
                self.sql(sql, {'e': etype})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   607
        # Final commit
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   608
        self.commit()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   609
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   610
    def _cleanup_entities(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   611
        """ Cleanup etype table """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   612
        if self._eids_seq_range is None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   613
            # Remove DEFAULT eids sequence if added
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   614
            sql = 'ALTER TABLE cw_%s ALTER COLUMN cw_eid DROP DEFAULT;' % etype.lower()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   615
            self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   616
        # Create indexes and constraints
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   617
        tablename = SQL_PREFIX + etype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   618
        self.reapply_constraint_index(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   619
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   620
    def _cleanup_relations(self, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   621
        """ Cleanup rtype table """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   622
        # Push into relation table while removing duplicate
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   623
        sql = '''INSERT INTO %(r)s_relation (eid_from, eid_to) SELECT DISTINCT
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   624
                 T.eid_from, T.eid_to FROM %(r)s_relation_tmp AS T
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   625
                 WHERE NOT EXISTS (SELECT 1 FROM %(r)s_relation AS TT WHERE
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   626
                 TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);''' % {'r': rtype}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   627
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   628
        # Drop temporary relation table
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   629
        sql = ('DROP TABLE %(r)s_relation_tmp' % {'r': rtype.lower()})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   630
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   631
        # Create indexes and constraints
10869
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   632
        tablename = '%s_relation' % rtype.lower()
575982c948a9 [dataimport] remove drop_index parameter from massive store
Julien Cristau <julien.cristau@logilab.fr>
parents: 10867
diff changeset
   633
        self.reapply_constraint_index(tablename)
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   634
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   635
    def insert_massive_meta_data(self, etype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   636
        """ Massive insertion of meta data for a given etype, based on SQL statements.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   637
        """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   638
        # Push data - Use coalesce to avoid NULL (and get 0), if there is no
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   639
        # entities of this type in the entities table.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   640
        # Meta data relations
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   641
        self.metagen_push_relation(etype, self._cnx.user.eid, 'created_by_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   642
        self.metagen_push_relation(etype, self._cnx.user.eid, 'owned_by_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   643
        self.metagen_push_relation(etype, self.source.eid, 'cw_source_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   644
        self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   645
        self.metagen_push_relation(etype, self._etype_eid_idx[etype], 'is_instance_of_relation')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   646
        sql = ("INSERT INTO entities (eid, type, asource, extid) "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   647
               "SELECT cw_eid, '%s', 'system', NULL FROM cw_%s "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   648
               "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   649
               % (etype, etype.lower()))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   650
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   651
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   652
    def metagen_push_relation(self, etype, eid_to, rtype):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   653
        sql = ("INSERT INTO %s (eid_from, eid_to) SELECT cw_eid, %s FROM cw_%s "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   654
               "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   655
               % (rtype, eid_to, etype.lower()))
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   656
        self.sql(sql)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   657
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   658
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   659
### CONSTRAINTS MANAGEMENT FUNCTIONS  ##########################################
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   660
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   661
def get_size_constraints(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   662
    """analyzes yams ``schema`` and returns the list of size constraints.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   663
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   664
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   665
    sub-dictionnaries mapping attribute names -> max size.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   666
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   667
    size_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   668
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   669
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   670
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   671
        size_constraints[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   672
        for rschema, aschema in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   673
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   674
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   675
            maxsize = None
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   676
            rdef = rschema.rdef(eschema, aschema)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   677
            for constraint in rdef.constraints:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   678
                if isinstance(constraint, SizeConstraint):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   679
                    maxsize = constraint.max
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   680
                    eschema_constraints[rschema.type] = maxsize
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   681
    return size_constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   682
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   683
def get_default_values(schema):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   684
    """analyzes yams ``schema`` and returns the list of default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   685
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   686
    The returned value is a dictionary mapping entity types to a
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   687
    sub-dictionnaries mapping attribute names -> default values.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   688
    """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   689
    default_values = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   690
    # iterates on all entity types
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   691
    for eschema in schema.entities():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   692
        # for each entity type, iterates on attribute definitions
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   693
        default_values[eschema.type] = eschema_constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   694
        for rschema, _ in eschema.attribute_definitions():
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   695
            # for each attribute, if a size constraint is found,
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   696
            # append it to the size constraint list
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   697
            if eschema.default(rschema.type) is not None:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   698
                eschema_constraints[rschema.type] = eschema.default(rschema.type)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   699
    return default_values
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   700
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   701
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   702
class PGHelper(object):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   703
    def __init__(self, cnx, pg_schema='public'):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   704
        self.cnx = cnx
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   705
        # Deals with pg schema, see #3216686
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   706
        self.pg_schema = pg_schema
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   707
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   708
    def application_indexes_constraints(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   709
        """ Get all the indexes/constraints for a given tablename """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   710
        indexes = self.application_indexes(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   711
        constraints = self.application_constraints(tablename)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   712
        _indexes = {}
10856
b839167d99a4 [dataimport] dict.iteritems() → dict.items()
Julien Cristau <julien.cristau@logilab.fr>
parents: 10855
diff changeset
   713
        for name, query in indexes.items():
10853
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   714
            # Remove pkey indexes (automatically created by constraints)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   715
            # Specific cases of primary key, see #3224079
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   716
            if name not in constraints:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   717
                _indexes[name] = query
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   718
        return _indexes, constraints
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   719
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   720
    def table_exists(self, table_name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   721
        sql = "SELECT * from information_schema.tables WHERE table_name=%(t)s AND table_schema=%(s)s"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   722
        crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   723
        res = crs.fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   724
        if res:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   725
            return True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   726
        return False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   727
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   728
    # def check_if_primary_key_exists_for_table(self, table_name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   729
    #     sql = ("SELECT constraint_name FROM information_schema.table_constraints "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   730
    #            "WHERE constraint_type = 'PRIMARY KEY' AND table_name=%(t)s AND table_schema=%(s)s")
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   731
    #     crs = self.cnx.system_sql(sql, {'t': table_name, 's': self.pg_schema})
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   732
    #     res = crs.fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   733
    #     if res:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   734
    #         return True
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   735
    #     return False
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   736
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   737
    def index_query(self, name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   738
        """Get the request to be used to recreate the index"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   739
        return self.cnx.system_sql("SELECT pg_get_indexdef(c.oid) "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   740
                                   "from pg_catalog.pg_class c "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   741
                                   "LEFT JOIN pg_catalog.pg_namespace n "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   742
                                   "ON n.oid = c.relnamespace "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   743
                                   "WHERE c.relname = %(r)s AND n.nspname=%(n)s",
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   744
                                   {'r': name, 'n': self.pg_schema}).fetchone()[0]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   745
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   746
    def constraint_query(self, name):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   747
        """Get the request to be used to recreate the constraint"""
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   748
        return self.cnx.system_sql("SELECT pg_get_constraintdef(c.oid) "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   749
                                   "from pg_catalog.pg_constraint c "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   750
                                   "LEFT JOIN pg_catalog.pg_namespace n "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   751
                                   "ON n.oid = c.connamespace "
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   752
                                   "WHERE c.conname = %(r)s AND n.nspname=%(n)s",
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   753
                                   {'r': name, 'n': self.pg_schema}).fetchone()[0]
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   754
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   755
    def application_indexes(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   756
        """ Iterate over all the indexes """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   757
        # This SQL query (cf http://www.postgresql.org/message-id/432F450F.4080700@squiz.net)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   758
        # aims at getting all the indexes for each table.
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   759
        sql = '''SELECT c.relname as "Name"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   760
        FROM pg_catalog.pg_class c
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   761
        JOIN pg_catalog.pg_index i ON i.indexrelid = c.oid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   762
        JOIN pg_catalog.pg_class c2 ON i.indrelid = c2.oid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   763
        LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   764
        LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   765
        WHERE c.relkind IN ('i','')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   766
        AND c2.relname = '%s'
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   767
        AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   768
        AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   769
        indexes_list = self.cnx.system_sql(sql).fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   770
        indexes = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   771
        for name, in indexes_list:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   772
            indexes[name] = self.index_query(name)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   773
        return indexes
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   774
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   775
    def application_constraints(self, tablename):
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   776
        """ Iterate over all the constraints """
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   777
        sql = '''SELECT i.conname as "Name"
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   778
                 FROM pg_catalog.pg_class c JOIN pg_catalog.pg_constraint i
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   779
                 ON i.conrelid = c.oid JOIN pg_catalog.pg_class c2 ON i.conrelid=c2.oid
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   780
                 LEFT JOIN pg_catalog.pg_user u ON u.usesysid = c.relowner
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   781
                 LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   782
                 WHERE c2.relname = '%s' AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   783
                 AND pg_catalog.pg_table_is_visible(c.oid);''' % tablename
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   784
        indexes_list = self.cnx.system_sql(sql).fetchall()
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   785
        constraints = {}
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   786
        for name, in indexes_list:
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   787
            query = self.constraint_query(name)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   788
            constraints[name] = 'ALTER TABLE %s ADD CONSTRAINT %s %s' % (tablename, name, query)
de741492538d [dataimport] backport massive store from dataio cube
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   789
        return constraints