[dataimport] drop extra indirection through MassiveObjectStore._initialized dict
authorJulien Cristau <julien.cristau@logilab.fr>
Mon, 09 Nov 2015 16:21:29 +0100
changeset 10879 3193d9ede8dd
parent 10878 fda5e42037a9
child 10880 5fb592895e0f
[dataimport] drop extra indirection through MassiveObjectStore._initialized dict We already have self.__dict__, no need to add another one for a static set of keys.
dataimport/massive_store.py
--- a/dataimport/massive_store.py	Mon Nov 09 16:04:13 2015 +0100
+++ b/dataimport/massive_store.py	Mon Nov 09 16:21:29 2015 +0100
@@ -101,12 +101,18 @@
         self._cnx = cnx
         self.sql = cnx.system_sql
         self._data_uri_relations = defaultdict(list)
-        self._initialized = {'init_uri_eid': set(),
-                             'uri_eid_inserted': set(),
-                             'uri_rtypes': set(),
-                             'entities': set(),
-                             'rtypes': set(),
-                            }
+
+        # etypes for which we have a uri_eid_%(etype)s table
+        self._init_uri_eid = set()
+        # etypes for which we have a uri_eid_%(e)s_idx index
+        self._uri_eid_inserted = set()
+        # set of rtypes for which we have a %(rtype)s_relation_iid_tmp table
+        self._uri_rtypes = set()
+        # set of etypes whose tables are created
+        self._entities = set()
+        # set of rtypes for which we have a %(rtype)s_relation_tmp table
+        self._rtypes = set()
+
         self.sql = self._cnx.system_sql
         self.slave_mode = slave_mode
         self.size_constraints = get_size_constraints(cnx.vreg.schema)
@@ -141,15 +147,15 @@
     ### INIT FUNCTIONS ########################################################
 
     def init_rtype_table(self, etype_from, rtype, etype_to):
-        """ Build temporary table a for standard rtype """
+        """ Build temporary table for standard rtype """
         # Create an uri_eid table for each etype for a better
-        # control of which etype is concerns for a particular
+        # control of which etype is concerned by a particular
         # possibly multivalued relation.
         for etype in (etype_from, etype_to):
-            if etype and etype not in self._initialized['init_uri_eid']:
+            if etype and etype not in self._init_uri_eid:
                 self._init_uri_eid_table(etype)
-        if rtype not in self._initialized['uri_rtypes']:
-            # Create the temporary tables
+        if rtype not in self._uri_rtypes:
+            # Create the temporary table
             if not self._cnx.repo.schema.rschema(rtype).inlined:
                 try:
                     sql = 'CREATE TABLE %(r)s_relation_iid_tmp (uri_from character ' \
@@ -160,8 +166,8 @@
                     pass
             else:
                 self.logger.warning("inlined relation %s: cannot insert it", rtype)
-            #Add it to the initialized set
-            self._initialized['uri_rtypes'].add(rtype)
+            # Add it to the initialized set
+            self._uri_rtypes.add(rtype)
 
     def _init_uri_eid_table(self, etype):
         """ Build a temporary table for id/eid convertion
@@ -173,7 +179,7 @@
             # XXX Already exist (probably due to multiple import)
             pass
         # Add it to the initialized set
-        self._initialized['init_uri_eid'].add(etype)
+        self._init_uri_eid.add(etype)
 
     def _init_massive_metatables(self):
         # Check if our tables are not already created (i.e. a restart)
@@ -224,7 +230,7 @@
         # Add indexes
         self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()})
         # Set the etype as converted
-        self._initialized['uri_eid_inserted'].add(etype)
+        self._uri_eid_inserted.add(etype)
         self.commit()
 
     def convert_relations(self, etype_from, rtype, etype_to,
@@ -234,9 +240,9 @@
         # Always flush relations to be sure
         self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to)
         self.flush_relations()
-        if uri_label_from and etype_from not in self._initialized['uri_eid_inserted']:
+        if uri_label_from and etype_from not in self._uri_eid_inserted:
             self.fill_uri_eid_table(etype_from, uri_label_from)
-        if uri_label_to and etype_to not in self._initialized['uri_eid_inserted']:
+        if uri_label_to and etype_to not in self._uri_eid_inserted:
             self.fill_uri_eid_table(etype_to, uri_label_to)
         if self._cnx.repo.schema.rschema(rtype).inlined:
             self.logger.warning("Can't insert inlined relation %s", rtype)
@@ -326,7 +332,7 @@
     def init_relation_table(self, rtype):
         """ Get and remove all indexes for performance sake """
         # Create temporary table
-        if not self.slave_mode and rtype not in self._initialized['rtypes']:
+        if not self.slave_mode and rtype not in self._rtypes:
             sql = "CREATE TABLE %s_relation_tmp (eid_from integer, eid_to integer)" % rtype.lower()
             self.sql(sql)
             # Drop indexes and constraints
@@ -337,7 +343,7 @@
             sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)'
             self.sql(sql, {'e': rtype, 't': 'rtype'})
             # Mark rtype as "initialized" for faster check
-            self._initialized['rtypes'].add(rtype)
+            self._rtypes.add(rtype)
 
     def init_create_initialized_table(self):
         """ Create the cwmassive initialized table
@@ -350,7 +356,7 @@
     def init_etype_table(self, etype):
         """ Add eid sequence to a particular etype table and
         remove all indexes for performance sake """
-        if etype not in self._initialized['entities']:
+        if etype not in self._entities:
             # Only for non-initialized etype and not slave mode store
             if not self.slave_mode:
                 if self.eids_seq_range is None:
@@ -367,7 +373,7 @@
                 sql = 'INSERT INTO cwmassive_initialized VALUES (%(e)s, %(t)s)'
                 self.sql(sql, {'e': etype, 't': 'etype'})
             # Mark etype as "initialized" for faster check
-            self._initialized['entities'].add(etype)
+            self._entities.add(etype)
 
 
     ### ENTITIES CREATION #####################################################
@@ -447,10 +453,10 @@
             raise RuntimeError('Store cleanup is not allowed in slave mode')
         self.logger.info("Start cleaning")
         # Cleanup relations tables
-        for etype in self._initialized['init_uri_eid']:
+        for etype in self._init_uri_eid:
             self.sql('DROP TABLE uri_eid_%s' % etype.lower())
         # Remove relations tables
-        for rtype in self._initialized['uri_rtypes']:
+        for rtype in self._uri_rtypes:
             if not self._cnx.repo.schema.rschema(rtype).inlined:
                 self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype})
             else: