[massive store] Delay metadata attributes and default values handling to flush_entities
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Wed, 05 Oct 2016 09:27:25 +0200
changeset 11785 0cea67f41d0c
parent 11784 c1aa50a88de3
child 11786 f5d26d3648d4
[massive store] Delay metadata attributes and default values handling to flush_entities This may be done once for all there, instead of for each entity in prepare_insert_entity.
cubicweb/dataimport/massive_store.py
--- a/cubicweb/dataimport/massive_store.py	Wed Oct 05 09:38:05 2016 +0200
+++ b/cubicweb/dataimport/massive_store.py	Wed Oct 05 09:27:25 2016 +0200
@@ -185,25 +185,17 @@
 
     # store api ################################################################
 
-    def prepare_insert_entity(self, etype, **kwargs):
+    def prepare_insert_entity(self, etype, **data):
         """Given an entity type, attributes and inlined relations, returns the inserted entity's
         eid.
         """
         if not self.slave_mode and etype not in self._initialized:
             self._initialized.add(etype)
             self.master_init_etype(etype)
-        attrs = self.metagen.base_etype_attrs(etype)
-        data = copy(attrs)  # base_etype_attrs is @cached, a copy is necessary
-        data.update(kwargs)
         if 'eid' not in data:
             # If eid is not given and the eids sequence is set, use the value from the sequence
             eid = self.get_next_eid()
             data['eid'] = eid
-        # XXX default values could be set once for all in base entity
-        default_values = self.default_values[etype]
-        missing_keys = set(default_values) - set(data)
-        data.update((key, default_values[key]) for key in missing_keys)
-        self.metagen.init_entity_attrs(etype, data['eid'], data)
         self._data_entities[etype].append(data)
         return data['eid']
 
@@ -303,6 +295,7 @@
 
     def flush_entities(self):
         """Flush the entities data."""
+        metagen = self.metagen
         for etype, data in self._data_entities.items():
             if not data:
                 # There is no data for these etype for this flush round.
@@ -314,11 +307,14 @@
             columns = set()
             for d in data:
                 columns.update(d)
+            _base_data = dict.fromkeys(columns)
+            _base_data.update(self.default_values[etype])
+            _base_data.update(metagen.base_etype_attrs(etype))
             _data = []
-            _base_data = dict.fromkeys(columns)
             for d in data:
                 _d = _base_data.copy()
                 _d.update(d)
+                metagen.init_entity_attrs(etype, _d['eid'], _d)
                 _data.append(_d)
             buf = pgstore._create_copyfrom_buffer(_data, columns)
             if not buf: