[dataimport] Move stores to new API.
authorYann Voté <yann.vote@logilab.fr>
Wed, 24 Jun 2015 23:23:57 +0200
changeset 10457 1f5026e7d848
parent 10456 e7ee508a8b2f
child 10458 9071db303c0c
[dataimport] Move stores to new API. Here is the final store API: * ``prepare_insert_entity(<entity type>, **kwargs) -> eid``: given an entity type, attributes and inlined relations, return the eid of the entity to be inserted, *with no guarantee that anything has been inserted in database*, * ``prepare_update_entity(<entity type>, eid, **kwargs) -> None``: given an entity type and eid, promise for update given attributes and inlined relations *with no guarantee that anything has been inserted in database*, * ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a relation ``rtype`` should be added between entities with eids ``eid_from`` and ``eid_to``. Similarly to ``prepare_insert_entity()``, *there is no guarantee that the relation will be inserted in database*, * ``flush() -> None``: flush any temporary data to database. May be called several times during an import, * ``finish() -> None``: additional stuff to do after import is terminated. **Warning:** ``prepare_update_entity()`` still needs to be implemented for NoHookRQLObjectStore. Related to #5040344
dataimport/__init__.py
dataimport/test/unittest_dataimport.py
doc/book/en/devrepo/dataimport.rst
--- a/dataimport/__init__.py	Tue Jun 23 13:08:48 2015 +0200
+++ b/dataimport/__init__.py	Wed Jun 24 23:23:57 2015 +0200
@@ -37,10 +37,10 @@
           entity = mk_entity(row, USERS)
           entity['upassword'] = 'motdepasse'
           ctl.check('login', entity['login'], None)
-          entity = ctl.store.create_entity('CWUser', **entity)
-          email = ctl.store.create_entity('EmailAddress', address=row['email'])
-          ctl.store.relate(entity.eid, 'use_email', email.eid)
-          ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']})
+          entity = ctl.store.prepare_insert_entity('CWUser', **entity)
+          email = ctl.store.prepare_insert_entity('EmailAddress', address=row['email'])
+          ctl.store.prepare_insert_relation(entity, 'use_email', email)
+          ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x': entity})
 
   CHK = [('login', check_doubles, 'Utilisateurs Login',
           'Deux utilisateurs ne devraient pas avoir le même login.'),
@@ -543,9 +543,9 @@
     But it will not enforce the constraints of the schema and hence will miss some problems
 
     >>> store = ObjectStore()
-    >>> user = store.create_entity('CWUser', login=u'johndoe')
-    >>> group = store.create_entity('CWUser', name=u'unknown')
-    >>> store.relate(user.eid, 'in_group', group.eid)
+    >>> user = store.prepare_insert_entity('CWUser', login=u'johndoe')
+    >>> group = store.prepare_insert_entity('CWUser', name=u'unknown')
+    >>> store.prepare_insert_relation(user, 'in_group', group)
     """
     def __init__(self):
         self.items = []
@@ -554,26 +554,43 @@
         self.relations = set()
         self.indexes = {}
 
-    def create_entity(self, etype, **data):
+    def prepare_insert_entity(self, etype, **data):
+        """Given an entity type, attributes and inlined relations, return an eid for the entity that
+        would be inserted with a real store.
+        """
         data = attrdict(data)
         data['eid'] = eid = len(self.items)
         self.items.append(data)
         self.eids[eid] = data
         self.types.setdefault(etype, []).append(eid)
-        return data
+        return eid
 
-    def relate(self, eid_from, rtype, eid_to, **kwargs):
-        """Add new relation"""
+    def prepare_update_entity(self, etype, eid, **kwargs):
+        """Given an entity type and eid, updates the corresponding fake entity with specified
+        attributes and inlined relations.
+        """
+        assert eid in self.types[etype], 'Trying to update with wrong type {}'.format(etype)
+        data = self.eids[eid]
+        data.update(kwargs)
+
+    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
+        """Store into the `relations` attribute that a relation ``rtype`` exists between entities
+        with eids ``eid_from`` and ``eid_to``.
+        """
         relation = eid_from, rtype, eid_to
         self.relations.add(relation)
         return relation
 
+    def flush(self):
+        """Nothing to flush for this store."""
+        pass
+
     def commit(self):
-        """this commit method does nothing by default"""
+        """Nothing to commit for this store."""
         return
 
-    def flush(self):
-        """The method is provided so that all stores share a common API"""
+    def finish(self):
+        """Nothing to do once import is terminated for this store."""
         pass
 
     @property
@@ -586,6 +603,16 @@
     def nb_inserted_relations(self):
         return len(self.relations)
 
+    @deprecated('[3.21] use prepare_insert_entity instead')
+    def create_entity(self, etype, **data):
+        self.prepare_insert_entity(etype, **data)
+        return attrdict(data)
+
+    @deprecated('[3.21] use prepare_insert_relation instead')
+    def relate(self, eid_from, rtype, eid_to, **kwargs):
+        self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs)
+
+
 class RQLObjectStore(ObjectStore):
     """ObjectStore that works with an actual RQL repository (production mode)"""
 
@@ -599,29 +626,45 @@
         self._commit = commit or cnx.commit
 
     def commit(self):
+        """Commit the database transaction."""
         return self._commit()
 
     def rql(self, *args):
         return self._cnx.execute(*args)
 
+    def prepare_insert_entity(self, *args, **kwargs):
+        """Given an entity type, attributes and inlined relations, returns the inserted entity's
+        eid.
+        """
+        entity = self._cnx.create_entity(*args, **kwargs)
+        self.eids[entity.eid] = entity
+        self.types.setdefault(args[0], []).append(entity.eid)
+        return entity.eid
+
+    def prepare_update_entity(self, etype, eid, **kwargs):
+        """Given an entity type and eid, updates the corresponding entity with specified attributes
+        and inlined relations.
+        """
+        entity = self._cnx.entity_from_eid(eid)
+        assert entity.cw_etype == etype, 'Trying to update with wrong type {}'.format(etype)
+        # XXX some inlined relations may already exists
+        entity.cw_set(**kwargs)
+
+    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
+        """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
+        and ``eid_to``.
+        """
+        eid_from, rtype, eid_to = super(RQLObjectStore, self).prepare_insert_relation(
+            eid_from, rtype, eid_to, **kwargs)
+        self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
+                 {'x': int(eid_from), 'y': int(eid_to)})
+
     @property
     def session(self):
         warnings.warn('[3.19] deprecated property.', DeprecationWarning,
                       stacklevel=2)
         return self._cnx.repo._get_session(self._cnx.sessionid)
 
-    def create_entity(self, *args, **kwargs):
-        entity = self._cnx.create_entity(*args, **kwargs)
-        self.eids[entity.eid] = entity
-        self.types.setdefault(args[0], []).append(entity.eid)
-        return entity
-
-    def relate(self, eid_from, rtype, eid_to, **kwargs):
-        eid_from, rtype, eid_to = super(RQLObjectStore, self).relate(
-            eid_from, rtype, eid_to, **kwargs)
-        self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
-                 {'x': int(eid_from), 'y': int(eid_to)})
-
     @deprecated("[3.19] use cnx.find(*args, **kwargs).entities() instead")
     def find_entities(self, *args, **kwargs):
         return self._cnx.find(*args, **kwargs).entities()
@@ -630,6 +673,15 @@
     def find_one_entity(self, *args, **kwargs):
         return self._cnx.find(*args, **kwargs).one()
 
+    @deprecated('[3.21] use prepare_insert_entity instead')
+    def create_entity(self, *args, **kwargs):
+        eid = self.prepare_insert_entity(*args, **kwargs)
+        return self._cnx.entity_from_eid(eid)
+
+    @deprecated('[3.21] use prepare_insert_relation instead')
+    def relate(self, eid_from, rtype, eid_to, **kwargs):
+        self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs)
+
 # the import controller ########################################################
 
 class CWImportController(object):
@@ -771,7 +823,10 @@
         cnx.read_security = False
         cnx.write_security = False
 
-    def create_entity(self, etype, **kwargs):
+    def prepare_insert_entity(self, etype, **kwargs):
+        """Given an entity type, attributes and inlined relations, returns the inserted entity's
+        eid.
+        """
         for k, v in kwargs.iteritems():
             kwargs[k] = getattr(v, 'eid', v)
         entity, rels = self.metagen.base_etype_dicts(etype)
@@ -798,9 +853,15 @@
                 self.add_relation(cnx, entity.eid, rtype, targeteids,
                                   inlined, **kwargs)
         self._nb_inserted_entities += 1
-        return entity
+        return entity.eid
+
+    # XXX: prepare_update_entity is inherited from RQLObjectStore, it should be reimplemented to
+    # actually skip hooks as prepare_insert_entity
 
-    def relate(self, eid_from, rtype, eid_to, **kwargs):
+    def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
+        """Insert into the database a  relation ``rtype`` between entities with eids ``eid_from``
+        and ``eid_to``.
+        """
         assert not rtype.startswith('reverse_')
         self.add_relation(self._cnx, eid_from, rtype, eid_to,
                           self.rschema(rtype).inlined)
--- a/dataimport/test/unittest_dataimport.py	Tue Jun 23 13:08:48 2015 +0200
+++ b/dataimport/test/unittest_dataimport.py	Wed Jun 24 23:23:57 2015 +0200
@@ -14,18 +14,34 @@
     def test_all(self):
         with self.admin_access.repo_cnx() as cnx:
             store = dataimport.RQLObjectStore(cnx)
-            group_eid = store.create_entity('CWGroup', name=u'grp').eid
-            user_eid = store.create_entity('CWUser', login=u'lgn', upassword=u'pwd').eid
+            # Check data insertion
+            group_eid = store.prepare_insert_entity('CWGroup', name=u'grp')
+            user_eid = store.prepare_insert_entity('CWUser', login=u'lgn',
+                                                   upassword=u'pwd')
             store.relate(user_eid, 'in_group', group_eid)
             cnx.commit()
-
-        with self.admin_access.repo_cnx() as cnx:
             users = cnx.execute('CWUser X WHERE X login "lgn"')
             self.assertEqual(1, len(users))
             self.assertEqual(user_eid, users.one().eid)
             groups = cnx.execute('CWGroup X WHERE U in_group X, U login "lgn"')
             self.assertEqual(1, len(users))
             self.assertEqual(group_eid, groups.one().eid)
+            # Check data update
+            self.set_description('Check data update')
+            store.prepare_update_entity('CWGroup', group_eid, name=u'new_grp')
+            cnx.commit()
+            group = cnx.execute('CWGroup X WHERE X name "grp"')
+            self.assertEqual(len(group), 0)
+            group = cnx.execute('CWGroup X WHERE X name "new_grp"')
+            self.assertEqual, len(group), 1
+            # Check data update with wrong type
+            with self.assertRaises(AssertionError):
+                store.prepare_update_entity('CWUser', group_eid, name=u'new_user')
+            cnx.commit()
+            group = cnx.execute('CWGroup X WHERE X name "new_user"')
+            self.assertEqual(len(group), 0)
+            group = cnx.execute('CWGroup X WHERE X name "new_grp"')
+            self.assertEqual(len(group), 1)
 
 
 class CreateCopyFromBufferTC(TestCase):
--- a/doc/book/en/devrepo/dataimport.rst	Tue Jun 23 13:08:48 2015 +0200
+++ b/doc/book/en/devrepo/dataimport.rst	Wed Jun 24 23:23:57 2015 +0200
@@ -5,29 +5,54 @@
 Dataimport
 ==========
 
-*CubicWeb* is designed to manipulate huge of amount of data, and provides helper functions to do so.
-These functions insert data within different levels of the *CubicWeb* API,
-allowing different speed/security tradeoffs. Those keeping all the *CubicWeb* hooks
-and security will be slower but the possible errors in insertion
-(bad data types, integrity error, ...) will be raised.
+*CubicWeb* is designed to manipulate huge of amount of data, and provides utilities to do so.  They
+allow to insert data within different levels of the *CubicWeb* API, allowing different
+speed/security tradeoffs. Those keeping all the *CubicWeb* hooks and security will be slower but the
+possible errors in insertion (bad data types, integrity error, ...) will be raised.
 
-These dataimport function are provided in the file `dataimport.py`.
+These data import utilities are provided in the package `cubicweb.dataimport`.
 
 All the stores have the following API::
 
-    >>> store = ObjectStore()
-    >>> user = store.create_entity('CWUser', login=u'johndoe')
-    >>> group = store.create_entity('CWUser', name=u'unknown')
-    >>> store.relate(user.eid, 'in_group', group.eid)
+    >>> user_eid = store.prepare_insert_entity('CWUser', login=u'johndoe')
+    >>> group_eid = store.prepare_insert_entity('CWUser', name=u'unknown')
+    >>> store.relate(user_eid, 'in_group', group_eid)
+    >>> store.flush()
+    >>> store.commit()
+    >>> store.finish()
+
+Some stores **require a flush** to copy data in the database, so if you want to have store
+independent code you should explicitly call it. (There may be multiple flushes during the
+process, or only one at the end if there is no memory issue). This is different from the
+commit which validates the database transaction. At last, the `finish()` method should be called in
+case the store requires additional work once everything is done.
 
+* ``prepare_insert_entity(<entity type>, **kwargs) -> eid``: given an entity
+  type, attributes and inlined relations, return the eid of the entity to be
+  inserted, *with no guarantee that anything has been inserted in database*.
+
+* ``prepare_update_entity(<entity type>, eid, **kwargs) -> None``: given an
+  entity type and eid, promise for update given attributes and inlined
+  relations *with no guarantee that anything has been inserted in database*.
+
+* ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a
+  relation ``rtype`` should be added between entities with eids ``eid_from``
+  and ``eid_to``. Similar to ``prepare_insert_entity()``, *there is no
+  guarantee that the relation has been inserted in database*.
+
+* ``flush() -> None``: flush any temporary data to database. May be called
+  several times during an import.
+
+* ``commit() -> None``: commit the database transaction.
+
+* ``finish() -> None``: additional stuff to do after import is terminated.
 
 ObjectStore
 -----------
 
-This store keeps objects in memory for *faster* validation. It may be useful
-in development mode. However, as it will not enforce the constraints of the schema,
-it may miss some problems.
-
+This store keeps objects in memory for *faster* validation. It may be useful in development
+mode. However, as it will not enforce the constraints of the schema nor insert anything in the
+database, so it may miss some problems.
 
 
 RQLObjectStore
@@ -48,11 +73,3 @@
 This store relies on *COPY FROM*/execute many sql commands to directly push data using SQL commands
 rather than using the whole *CubicWeb* API. For now, **it only works with PostgresSQL** as it requires
 the *COPY FROM* command.
-
-The API is similar to the other stores, but **it requires a flush** after some imports to copy data
-in the database (these flushes may be multiples through the processes, or be done only once at the
-end if there is no memory issue)::
-
-    >>> store = SQLGenObjectStore(session)
-    >>> store.create_entity('Person', ...)
-    >>> store.flush()