diff -r fda5e42037a9 -r 3193d9ede8dd dataimport/massive_store.py --- a/dataimport/massive_store.py Mon Nov 09 16:04:13 2015 +0100 +++ b/dataimport/massive_store.py Mon Nov 09 16:21:29 2015 +0100 @@ -101,12 +101,18 @@ self._cnx = cnx self.sql = cnx.system_sql self._data_uri_relations = defaultdict(list) - self._initialized = {'init_uri_eid': set(), - 'uri_eid_inserted': set(), - 'uri_rtypes': set(), - 'entities': set(), - 'rtypes': set(), - } + + # etypes for which we have a uri_eid_%(etype)s table + self._init_uri_eid = set() + # etypes for which we have a uri_eid_%(e)s_idx index + self._uri_eid_inserted = set() + # set of rtypes for which we have a %(rtype)s_relation_iid_tmp table + self._uri_rtypes = set() + # set of etypes whose tables are created + self._entities = set() + # set of rtypes for which we have a %(rtype)s_relation_tmp table + self._rtypes = set() + self.sql = self._cnx.system_sql self.slave_mode = slave_mode self.size_constraints = get_size_constraints(cnx.vreg.schema) @@ -141,15 +147,15 @@ ### INIT FUNCTIONS ######################################################## def init_rtype_table(self, etype_from, rtype, etype_to): - """ Build temporary table a for standard rtype """ + """ Build temporary table for standard rtype """ # Create an uri_eid table for each etype for a better - # control of which etype is concerns for a particular + # control of which etype is concerned by a particular # possibly multivalued relation. for etype in (etype_from, etype_to): - if etype and etype not in self._initialized['init_uri_eid']: + if etype and etype not in self._init_uri_eid: self._init_uri_eid_table(etype) - if rtype not in self._initialized['uri_rtypes']: - # Create the temporary tables + if rtype not in self._uri_rtypes: + # Create the temporary table if not self._cnx.repo.schema.rschema(rtype).inlined: try: sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \ @@ -160,8 +166,8 @@ pass else: self.logger.warning("inlined relation %s: cannot insert it", rtype) - #Add it to the initialized set - self._initialized['uri_rtypes'].add(rtype) + # Add it to the initialized set + self._uri_rtypes.add(rtype) def _init_uri_eid_table(self, etype): """ Build a temporary table for id/eid convertion @@ -173,7 +179,7 @@ # XXX Already exist (probably due to multiple import) pass # Add it to the initialized set - self._initialized['init_uri_eid'].add(etype) + self._init_uri_eid.add(etype) def _init_massive_metatables(self): # Check if our tables are not already created (i.e. a restart) @@ -224,7 +230,7 @@ # Add indexes self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()}) # Set the etype as converted - self._initialized['uri_eid_inserted'].add(etype) + self._uri_eid_inserted.add(etype) self.commit() def convert_relations(self, etype_from, rtype, etype_to, @@ -234,9 +240,9 @@ # Always flush relations to be sure self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to) self.flush_relations() - if uri_label_from and etype_from not in self._initialized['uri_eid_inserted']: + if uri_label_from and etype_from not in self._uri_eid_inserted: self.fill_uri_eid_table(etype_from, uri_label_from) - if uri_label_to and etype_to not in self._initialized['uri_eid_inserted']: + if uri_label_to and etype_to not in self._uri_eid_inserted: self.fill_uri_eid_table(etype_to, uri_label_to) if self._cnx.repo.schema.rschema(rtype).inlined: self.logger.warning("Can't insert inlined relation %s", rtype) @@ -326,7 +332,7 @@ def init_relation_table(self, rtype): """ Get and remove all indexes for performance sake """ # Create temporary table - if not self.slave_mode and rtype not in self._initialized['rtypes']: + if not self.slave_mode and rtype not in self._rtypes: sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower() self.sql(sql) # Drop indexes and constraints @@ -337,7 +343,7 @@ sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)' self.sql(sql, {'e': rtype, 't': 'rtype'}) # Mark rtype as "initialized" for faster check - self._initialized['rtypes'].add(rtype) + self._rtypes.add(rtype) def init_create_initialized_table(self): """ Create the cwmassive initialized table @@ -350,7 +356,7 @@ def init_etype_table(self, etype): """ Add eid sequence to a particular etype table and remove all indexes for performance sake """ - if etype not in self._initialized['entities']: + if etype not in self._entities: # Only for non-initialized etype and not slave mode store if not self.slave_mode: if self.eids_seq_range is None: @@ -367,7 +373,7 @@ sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)' self.sql(sql, {'e': etype, 't': 'etype'}) # Mark etype as "initialized" for faster check - self._initialized['entities'].add(etype) + self._entities.add(etype) ### ENTITIES CREATION ##################################################### @@ -447,10 +453,10 @@ raise RuntimeError('Store cleanup is not allowed in slave mode') self.logger.info("Start cleaning") # Cleanup relations tables - for etype in self._initialized['init_uri_eid']: + for etype in self._init_uri_eid: self.sql('DROP TABLE uri_eid_%s' % etype.lower()) # Remove relations tables - for rtype in self._initialized['uri_rtypes']: + for rtype in self._uri_rtypes: if not self._cnx.repo.schema.rschema(rtype).inlined: self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype}) else: