[dataimport] introduce usage of MetadataGenerator into the massive store
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Mon, 01 Feb 2016 17:29:59 +0100
changeset 11326 06eeac9389a3
parent 11325 a29443fbd1f2
child 11327 901243e41152
[dataimport] introduce usage of MetadataGenerator into the massive store This allows more flexibility and code sharing w/ nohook store. Closes #10338621
cubicweb/dataimport/massive_store.py
--- a/cubicweb/dataimport/massive_store.py	Thu Jan 28 18:14:27 2016 +0100
+++ b/cubicweb/dataimport/massive_store.py	Mon Feb 01 17:29:59 2016 +0100
@@ -18,22 +18,18 @@
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
 
 import logging
-from datetime import datetime
+from copy import copy
 from collections import defaultdict
 from io import StringIO
 from itertools import chain
 
 from six.moves import range
 
-import pytz
-
 from yams.constraints import SizeConstraint
 
 from cubicweb.schema import PURE_VIRTUAL_RTYPES
 from cubicweb.server.schema2sql import rschema_has_table
-from cubicweb.server.sqlutils import SQL_PREFIX
 from cubicweb.dataimport import stores, pgstore
-from cubicweb.utils import make_uid
 
 
 class MassiveObjectStore(stores.RQLObjectStore):
@@ -92,8 +88,8 @@
     def __init__(self, cnx,
                  on_commit_callback=None, on_rollback_callback=None,
                  slave_mode=False,
-                 source=None,
-                 eids_seq_range=10000):
+                 eids_seq_range=10000,
+                 metagen=None):
         """ Create a MassiveObject store, with the following attributes:
 
         - cnx: CubicWeb cnx
@@ -104,6 +100,9 @@
         self.on_rollback_callback = on_rollback_callback
         self.slave_mode = slave_mode
         self.eids_seq_range = eids_seq_range
+        if metagen is None:
+            metagen = stores.MetadataGenerator(cnx)
+        self.metagen = metagen
 
         self.logger = logging.getLogger('dataimport.massive_store')
         self.sql = cnx.system_sql
@@ -127,17 +126,20 @@
         # set of rtypes for which we have a %(rtype)s_relation_iid_tmp table
         self._uri_rtypes = set()
 
-        self._now = datetime.now(pytz.utc)
-        self._default_cwuri = make_uid('_auto_generated')
-
         if not self.slave_mode:
             # drop constraint and metadata table, they will be recreated when self.finish() is
             # called
             self._drop_all_constraints()
             self._drop_metatables_constraints()
-        if source is None:
-            source = cnx.repo.system_source
-        self.source = source
+
+    def _get_eid_gen(self):
+        """ Function getting the next eid. This is done by preselecting
+        a given number of eids from the 'entities_id_seq', and then
+        storing them"""
+        while True:
+            last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range)
+            for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1):
+                yield eid
 
     # URI related things #######################################################
 
@@ -267,23 +269,6 @@
             'entities_id_seq', initial_value=start_eid))
         self._cnx.commit()
 
-    # ENTITIES CREATION #####################################################
-
-    def _get_eid_gen(self):
-        """ Function getting the next eid. This is done by preselecting
-        a given number of eids from the 'entities_id_seq', and then
-        storing them"""
-        while True:
-            last_eid = self._cnx.repo.system_source.create_eid(self._cnx, self.eids_seq_range)
-            for eid in range(last_eid - self.eids_seq_range + 1, last_eid + 1):
-                yield eid
-
-    def _apply_default_values(self, etype, kwargs):
-        """Apply the default values for a given etype, attribute and value."""
-        default_values = self.default_values[etype]
-        missing_keys = set(default_values) - set(kwargs)
-        kwargs.update((key, default_values[key]) for key in missing_keys)
-
     # store api ################################################################
 
     def prepare_insert_entity(self, etype, **kwargs):
@@ -296,21 +281,20 @@
             self.sql('CREATE TABLE IF NOT EXISTS cwmassive_initialized'
                      '(retype text, type varchar(128))')
             self.sql("INSERT INTO cwmassive_initialized VALUES (%(e)s, 'etype')", {'e': etype})
-        # Add meta data if not given
-        if 'modification_date' not in kwargs:
-            kwargs['modification_date'] = self._now
-        if 'creation_date' not in kwargs:
-            kwargs['creation_date'] = self._now
-        if 'cwuri' not in kwargs:
-            kwargs['cwuri'] = self._default_cwuri + str(self._count_cwuri)
-            self._count_cwuri += 1
-        if 'eid' not in kwargs:
-            # If eid is not given and the eids sequence is set,
-            # use the value from the sequence
-            kwargs['eid'] = self.get_next_eid()
-        self._apply_default_values(etype, kwargs)
-        self._data_entities[etype].append(kwargs)
-        return kwargs.get('eid')
+        attrs = self.metagen.base_etype_attrs(etype)
+        data = copy(attrs)  # base_etype_attrs is @cached, a copy is necessary
+        data.update(kwargs)
+        if 'eid' not in data:
+            # If eid is not given and the eids sequence is set, use the value from the sequence
+            eid = self.get_next_eid()
+            data['eid'] = eid
+        # XXX default values could be set once for all in base entity
+        default_values = self.default_values[etype]
+        missing_keys = set(default_values) - set(data)
+        data.update((key, default_values[key]) for key in missing_keys)
+        self.metagen.init_entity_attrs(etype, data['eid'], data)
+        self._data_entities[etype].append(data)
+        return data['eid']
 
     def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
         """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
@@ -466,13 +450,16 @@
     def insert_massive_metadata(self, etype):
         """ Massive insertion of meta data for a given etype, based on SQL statements.
         """
-        self._insert_meta_relation(etype, self._cnx.user.eid, 'created_by_relation')
-        self._insert_meta_relation(etype, self._cnx.user.eid, 'owned_by_relation')
-        self._insert_meta_relation(etype, self.source.eid, 'cw_source_relation')
-        eschema = self.schema[etype].eid
+        # insert standard metadata relations
+        for rtype, eid in self.metagen.base_etype_rels(etype).items():
+            self._insert_meta_relation(etype, eid, '%s_relation' % rtype)
+        # insert cw_source, is and is_instance_of relations (normally handled by the system source)
+        self._insert_meta_relation(etype, self.metagen.source.eid, 'cw_source_relation')
+        eschema = self.schema[etype]
         self._insert_meta_relation(etype, eschema.eid, 'is_relation')
-        for parent_eschema in eschema.ancestors() + [eschema]:
+        for parent_eschema in chain(eschema.ancestors(), [eschema]):
             self._insert_meta_relation(etype, parent_eschema.eid, 'is_instance_of_relation')
+        # finally insert records into the entities table
         self.sql("INSERT INTO entities (eid, type, asource, extid) "
                  "SELECT cw_eid, '%s', 'system', NULL FROM cw_%s "
                  "WHERE NOT EXISTS (SELECT 1 FROM entities WHERE eid=cw_eid)"