cubicweb/dataimport/massive_store.py
changeset 11778 9847a097266e
parent 11777 5b535fe2f364
child 11780 307d96c0ab5a
equal deleted inserted replaced
11777:5b535fe2f364 11778:9847a097266e
    25 
    25 
    26 from six.moves import range
    26 from six.moves import range
    27 
    27 
    28 from yams.constraints import SizeConstraint
    28 from yams.constraints import SizeConstraint
    29 
    29 
    30 from cubicweb.schema import PURE_VIRTUAL_RTYPES
       
    31 from cubicweb.server.schema2sql import rschema_has_table
       
    32 from cubicweb.dataimport import stores, pgstore
    30 from cubicweb.dataimport import stores, pgstore
    33 
    31 
    34 
    32 
    35 class MassiveObjectStore(stores.RQLObjectStore):
    33 class MassiveObjectStore(stores.RQLObjectStore):
    36     """Store for massive import of data, with delayed insertion of meta data.
    34     """Store for massive import of data, with delayed insertion of meta data.
   108             for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1):
   106             for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1):
   109                 yield eid
   107                 yield eid
   110 
   108 
   111     # SQL utilities #########################################################
   109     # SQL utilities #########################################################
   112 
   110 
   113     def _drop_constraints(self):
   111     def _drop_metadata_constraints_if_necessary(self):
   114         """Drop """
   112         """Drop constraints and indexes for the metadata tables if necessary."""
   115         if not self._constraints_dropped:
   113         if not self._constraints_dropped:
   116             # drop constraint and metadata table, they will be recreated when self.finish() is
   114             self._drop_metadata_constraints()
   117             # called
       
   118             self._drop_all_constraints()
       
   119             self._drop_metatables_indexes()
       
   120             self._constraints_dropped = True
   115             self._constraints_dropped = True
   121 
   116 
   122     def _drop_all_constraints(self):
   117     def _drop_metadata_constraints(self):
   123         etypes_tables = ('cw_%s' % eschema.type.lower() for eschema in self.schema.entities()
   118         """Drop constraints and indexes for the metadata tables.
   124                          if not eschema.final)
   119 
   125         rtypes_tables = ('%s_relation' % rschema.type.lower() for rschema in self.schema.relations()
   120         They will be recreated by the `finish` method.
   126                          if rschema_has_table(rschema, skip_relations=PURE_VIRTUAL_RTYPES))
   121         """
   127         for tablename in chain(etypes_tables, rtypes_tables, ('entities',)):
       
   128             self._dbh.drop_constraints(tablename)
       
   129 
       
   130     def _drop_metatables_indexes(self):
       
   131         """ Drop all the constraints for the meta data"""
       
   132         for tablename in ('created_by_relation', 'owned_by_relation',
   122         for tablename in ('created_by_relation', 'owned_by_relation',
   133                           'is_instance_of_relation', 'is_relation',
   123                           'is_instance_of_relation', 'is_relation'):
   134                           'entities'):
   124              self._dbh.drop_constraints(tablename)
   135             self._dbh.drop_indexes(tablename)
   125             self._dbh.drop_indexes(tablename)
       
   126         # don't drop constraints for the entities table, the only one is the primary key's index on
       
   127         # eid and we want to keep it
       
   128         self._dbh.drop_indexes('entities')
   136 
   129 
   137     def restart_eid_sequence(self, start_eid):
   130     def restart_eid_sequence(self, start_eid):
   138         self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
   131         self.sql(self._cnx.repo.system_source.dbhelper.sql_restart_numrange(
   139             'entities_id_seq', initial_value=start_eid))
   132             'entities_id_seq', initial_value=start_eid))
   140         self._cnx.commit()
   133         self._cnx.commit()
   145         """Given an entity type, attributes and inlined relations, returns the inserted entity's
   138         """Given an entity type, attributes and inlined relations, returns the inserted entity's
   146         eid.
   139         eid.
   147         """
   140         """
   148         if not self.slave_mode and etype not in self._initialized:
   141         if not self.slave_mode and etype not in self._initialized:
   149             self._initialized.add(etype)
   142             self._initialized.add(etype)
   150             self._drop_constraints()
   143             self._drop_metadata_constraints_if_necessary()
   151             self._dbh.drop_indexes('cw_%s' % etype.lower())
   144             tablename = 'cw_%s' % etype.lower()
       
   145             self._dbh.drop_constraints(tablename)
       
   146             self._dbh.drop_indexes(tablename)
   152             self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
   147             self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
   153                      '(retype text, type varchar(128))')
   148                      '(retype text, type varchar(128))')
   154             self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype})
   149             self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype})
   155         attrs = self.metagen.base_etype_attrs(etype)
   150         attrs = self.metagen.base_etype_attrs(etype)
   156         data = copy(attrs)  # base_etype_attrs is @cached, a copy is necessary
   151         data = copy(attrs)  # base_etype_attrs is @cached, a copy is necessary
   174         Relation must not be inlined.
   169         Relation must not be inlined.
   175         """
   170         """
   176         if not self.slave_mode and rtype not in self._initialized:
   171         if not self.slave_mode and rtype not in self._initialized:
   177             assert not self._cnx.vreg.schema.rschema(rtype).inlined
   172             assert not self._cnx.vreg.schema.rschema(rtype).inlined
   178             self._initialized.add(rtype)
   173             self._initialized.add(rtype)
   179             self._drop_constraints()
   174             self._drop_metadata_constraints_if_necessary()
   180             self._dbh.drop_indexes('%s_relation' % rtype.lower())
   175             tablename = '%s_relation' % rtype.lower()
   181             self.sql('CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)'
   176             self._dbh.drop_constraints(tablename)
   182                      % rtype.lower())
   177             self._dbh.drop_indexes(tablename)
       
   178             self.sql('CREATE TABLE %s_tmp (eid_from integer, eid_to integer)'
       
   179                      % tablename)
   183             self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
   180             self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
   184                      '(retype text, type varchar(128))')
   181                      '(retype text, type varchar(128))')
   185             self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'rtype')", {'e': rtype})
   182             self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'rtype')", {'e': rtype})
   186         self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to})
   183         self._data_relations[rtype].append({'eid_from': eid_from, 'eid_to': eid_to})
   187 
   184