[massive store] isolate SQL statements that one may want to customize
authorAdrien Di Mascio <Adrien.DiMascio@logilab.fr>
Thu, 13 Oct 2016 15:12:33 +0200
changeset 11790 04607da552ac
parent 11789 71df2811b422
child 11791 20555214576b
[massive store] isolate SQL statements that one may want to customize i.e. provide public methods for meta data insertions
cubicweb/dataimport/massive_store.py
--- a/cubicweb/dataimport/massive_store.py	Thu Oct 06 12:12:04 2016 +0200
+++ b/cubicweb/dataimport/massive_store.py	Thu Oct 13 15:12:33 2016 +0200
@@ -234,12 +234,7 @@
                 self._dbh.drop_indexes(tablename)
                 for uuid in uuids:
                     tmp_tablename = '%s_%s' % (tablename, uuid)
-                    # XXX no index on the original relation table, EXISTS subquery may be sloooow
-                    self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT '
-                             'T.eid_from, T.eid_to FROM %(tmp_table)s AS T '
-                             'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE '
-                             'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);'
-                             % {'table': tablename, 'tmp_table': tmp_tablename})
+                    self.fill_relation_table(tablename, tmp_tablename)
                     self._tmp_data_cleanup(tmp_tablename, rtype, uuid)
         # restore all deleted indexes and constraints
         self._dbh.restore_indexes_and_constraints()
@@ -252,20 +247,31 @@
         """
         # insert standard metadata relations
         for rtype, eid in self.metagen.base_etype_rels(etype).items():
-            self._insert_meta_relation(tmp_tablename, rtype, eid)
+            self.fill_meta_relation_table(tmp_tablename, rtype, eid)
         # insert cw_source, is and is_instance_of relations (normally handled by the system source)
-        self._insert_meta_relation(tmp_tablename, 'cw_source', self.metagen.source.eid)
+        self.fill_meta_relation_table(tmp_tablename, 'cw_source', self.metagen.source.eid)
         eschema = self.schema[etype]
-        self._insert_meta_relation(tmp_tablename, 'is', eschema.eid)
+        self.fill_meta_relation_table(tmp_tablename, 'is', eschema.eid)
         for parent_eschema in chain(eschema.ancestors(), [eschema]):
-            self._insert_meta_relation(tmp_tablename, 'is_instance_of', parent_eschema.eid)
+            self.fill_meta_relation_table(tmp_tablename, 'is_instance_of', parent_eschema.eid)
+        self.fill_entities_table(etype, tmp_tablename)
+
+    def fill_entities_table(self, etype, tmp_tablename):
         # finally insert records into the entities table
         self.sql("INSERT INTO entities(eid, type) "
                  "SELECT cw_eid, '%s' FROM %s "
                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
                  % (etype, tmp_tablename))
 
-    def _insert_meta_relation(self, tmp_tablename, rtype, eid_to):
+    def fill_relation_table(self, tablename, tmp_tablename):
+        # XXX no index on the original relation table, EXISTS subquery may be sloooow
+        self.sql('INSERT INTO %(table)s(eid_from, eid_to) SELECT DISTINCT '
+                 'T.eid_from, T.eid_to FROM %(tmp_table)s AS T '
+                 'WHERE NOT EXISTS (SELECT 1 FROM %(table)s AS TT WHERE '
+                 'TT.eid_from=T.eid_from AND TT.eid_to=T.eid_to);'
+                 % {'table': tablename, 'tmp_table': tmp_tablename})
+
+    def fill_meta_relation_table(self, tmp_tablename, rtype, eid_to):
         self.sql("INSERT INTO %s_relation(eid_from, eid_to) SELECT cw_eid, %s FROM %s "
                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"
                  % (rtype, eid_to, tmp_tablename))