[dataimport] test for a value is in a set and insertion in a set should live together
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 28 Jan 2016 15:13:39 +0100
changeset 11319 fe90d07f3afa
parent 11318 09731bd52887
child 11320 78da04c853dc
[dataimport] test for a value is in a set and insertion in a set should live together also inline some methods on the way, in the hope to make the public api clearer at some point, and only put valuable values in those sets so we don't have to endlessly test for consistency.
cubicweb/dataimport/massive_store.py
--- a/cubicweb/dataimport/massive_store.py	Thu Jan 28 15:08:16 2016 +0100
+++ b/cubicweb/dataimport/massive_store.py	Thu Jan 28 15:13:39 2016 +0100
@@ -176,27 +176,19 @@
         # possibly multivalued relation.
         for etype in (etype_from, etype_to):
             if etype and etype not in self._init_uri_eid:
-                self._init_uri_eid_table(etype)
+                self._init_uri_eid.add(etype)
+                self.sql('CREATE TABLE IF NOT EXISTS uri_eid_%(e)s'
+                         '(uri character varying(%(size)s), eid integer)'
+                         % {'e': etype.lower(), 'size': self.iid_maxsize})
         if rtype not in self._uri_rtypes:
             # Create the temporary table
             if not self.schema.rschema(rtype).inlined:
                 self.sql('CREATE TABLE IF NOT EXISTS %(r)s_relation_iid_tmp'
                          '(uri_from character varying(%(s)s), uri_to character varying(%(s)s))'
                          % {'r': rtype, 's': self.iid_maxsize})
+                self._uri_rtypes.add(rtype)
             else:
                 self.logger.warning("inlined relation %s: cannot insert it", rtype)
-            # Add it to the initialized set
-            self._uri_rtypes.add(rtype)
-
-    def _init_uri_eid_table(self, etype):
-        """ Build a temporary table for id/eid convertion
-        """
-        self.sql('CREATE TABLE IF NOT EXISTS uri_eid_%(e)s'
-                 '(uri character varying(%(size)s), eid integer)'
-                 % {'e': etype.lower(), 'size': self.iid_maxsize})
-        # Add it to the initialized set
-        self._init_uri_eid.add(etype)
-
 
     # RELATE FUNCTION #######################################################
 
@@ -235,13 +227,13 @@
     def fill_uri_eid_table(self, etype, uri_label):
         """ Fill the uri_eid table
         """
-        self.logger.info('Fill uri_eid for etype %s', etype)
-        self.sql('INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s'
-                 % {'l': uri_label, 'e': etype.lower()})
-        # Add indexes
-        self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s' '(uri)' % {'e': etype.lower()})
-        # Set the etype as converted
-        self._uri_eid_inserted.add(etype)
+        if etype not in self._uri_eid_inserted:
+            self._uri_eid_inserted.add(etype)
+            self.logger.info('Fill uri_eid for etype %s', etype)
+            self.sql('INSERT INTO uri_eid_%(e)s SELECT cw_%(l)s, cw_eid FROM cw_%(e)s'
+                     % {'l': uri_label, 'e': etype.lower()})
+            self.sql('CREATE INDEX uri_eid_%(e)s_idx ON uri_eid_%(e)s(uri)'
+                     % {'e': etype.lower()})
 
     def convert_relations(self, etype_from, rtype, etype_to,
                           uri_label_from='cwuri', uri_label_to='cwuri'):
@@ -250,9 +242,9 @@
         # Always flush relations to be sure
         self.logger.info('Convert relations %s %s %s', etype_from, rtype, etype_to)
         self.flush_relations()
-        if uri_label_from and etype_from not in self._uri_eid_inserted:
+        if uri_label_from:
             self.fill_uri_eid_table(etype_from, uri_label_from)
-        if uri_label_to and etype_to not in self._uri_eid_inserted:
+        if uri_label_to:
             self.fill_uri_eid_table(etype_to, uri_label_to)
         if self.schema.rschema(rtype).inlined:
             self.logger.warning("Can't insert inlined relation %s", rtype)
@@ -430,10 +422,7 @@
             self.sql('DROP TABLE uri_eid_%s' % etype.lower())
         # Remove relations tables
         for rtype in self._uri_rtypes:
-            if not self.schema.rschema(rtype).inlined:
-                self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype})
-            else:
-                self.logger.warning("inlined relation %s: no cleanup to be done for it" % rtype)
+            self.sql('DROP TABLE %(r)s_relation_iid_tmp' % {'r': rtype})
         # Create meta constraints (entities, is_instance_of, ...)
         self._create_metatables_constraints()
         # Get all the initialized etypes/rtypes
@@ -443,7 +432,7 @@
                 self.logger.info('Cleanup for %s' % retype)
                 if _type == 'etype':
                     # Cleanup entities tables - Recreate indexes
-                    self._cleanup_entities(retype)
+                    self.reapply_constraint_index('cw_%s' % etype.lower())
                 elif _type == 'rtype':
                     # Cleanup relations tables
                     self._cleanup_relations(retype)
@@ -543,12 +532,6 @@
                 self.insert_massive_meta_data(etype)
                 self.sql('INSERT INTO cwmassive_metadata VALUES (%(e)s)', {'e': etype})
 
-    def _cleanup_entities(self, etype):
-        """ Cleanup etype table """
-        # Create indexes and constraints
-        tablename = SQL_PREFIX + etype.lower()
-        self.reapply_constraint_index(tablename)
-
     def _cleanup_relations(self, rtype):
         """ Cleanup rtype table """
         # Push into relation table while removing duplicate