# HG changeset patch # User Yann Voté # Date 1435181037 -7200 # Node ID 1f5026e7d848ec6ec0c95cde6f321233d5bd5500 # Parent e7ee508a8b2f4b074bd9b1a3b5eae8e6c86ba6a4 [dataimport] Move stores to new API. Here is the final store API: * ``prepare_insert_entity(, **kwargs) -> eid``: given an entity type, attributes and inlined relations, return the eid of the entity to be inserted, *with no guarantee that anything has been inserted in database*, * ``prepare_update_entity(, eid, **kwargs) -> None``: given an entity type and eid, promise for update given attributes and inlined relations *with no guarantee that anything has been inserted in database*, * ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a relation ``rtype`` should be added between entities with eids ``eid_from`` and ``eid_to``. Similarly to ``prepare_insert_entity()``, *there is no guarantee that the relation will be inserted in database*, * ``flush() -> None``: flush any temporary data to database. May be called several times during an import, * ``finish() -> None``: additional stuff to do after import is terminated. **Warning:** ``prepare_update_entity()`` still needs to be implemented for NoHookRQLObjectStore. Related to #5040344 diff -r e7ee508a8b2f -r 1f5026e7d848 dataimport/__init__.py --- a/dataimport/__init__.py Tue Jun 23 13:08:48 2015 +0200 +++ b/dataimport/__init__.py Wed Jun 24 23:23:57 2015 +0200 @@ -37,10 +37,10 @@ entity = mk_entity(row, USERS) entity['upassword'] = 'motdepasse' ctl.check('login', entity['login'], None) - entity = ctl.store.create_entity('CWUser', **entity) - email = ctl.store.create_entity('EmailAddress', address=row['email']) - ctl.store.relate(entity.eid, 'use_email', email.eid) - ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']}) + entity = ctl.store.prepare_insert_entity('CWUser', **entity) + email = ctl.store.prepare_insert_entity('EmailAddress', address=row['email']) + ctl.store.prepare_insert_relation(entity, 'use_email', email) + ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x': entity}) CHK = [('login', check_doubles, 'Utilisateurs Login', 'Deux utilisateurs ne devraient pas avoir le même login.'), @@ -543,9 +543,9 @@ But it will not enforce the constraints of the schema and hence will miss some problems >>> store = ObjectStore() - >>> user = store.create_entity('CWUser', login=u'johndoe') - >>> group = store.create_entity('CWUser', name=u'unknown') - >>> store.relate(user.eid, 'in_group', group.eid) + >>> user = store.prepare_insert_entity('CWUser', login=u'johndoe') + >>> group = store.prepare_insert_entity('CWUser', name=u'unknown') + >>> store.prepare_insert_relation(user, 'in_group', group) """ def __init__(self): self.items = [] @@ -554,26 +554,43 @@ self.relations = set() self.indexes = {} - def create_entity(self, etype, **data): + def prepare_insert_entity(self, etype, **data): + """Given an entity type, attributes and inlined relations, return an eid for the entity that + would be inserted with a real store. + """ data = attrdict(data) data['eid'] = eid = len(self.items) self.items.append(data) self.eids[eid] = data self.types.setdefault(etype, []).append(eid) - return data + return eid - def relate(self, eid_from, rtype, eid_to, **kwargs): - """Add new relation""" + def prepare_update_entity(self, etype, eid, **kwargs): + """Given an entity type and eid, updates the corresponding fake entity with specified + attributes and inlined relations. + """ + assert eid in self.types[etype], 'Trying to update with wrong type {}'.format(etype) + data = self.eids[eid] + data.update(kwargs) + + def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): + """Store into the `relations` attribute that a relation ``rtype`` exists between entities + with eids ``eid_from`` and ``eid_to``. + """ relation = eid_from, rtype, eid_to self.relations.add(relation) return relation + def flush(self): + """Nothing to flush for this store.""" + pass + def commit(self): - """this commit method does nothing by default""" + """Nothing to commit for this store.""" return - def flush(self): - """The method is provided so that all stores share a common API""" + def finish(self): + """Nothing to do once import is terminated for this store.""" pass @property @@ -586,6 +603,16 @@ def nb_inserted_relations(self): return len(self.relations) + @deprecated('[3.21] use prepare_insert_entity instead') + def create_entity(self, etype, **data): + self.prepare_insert_entity(etype, **data) + return attrdict(data) + + @deprecated('[3.21] use prepare_insert_relation instead') + def relate(self, eid_from, rtype, eid_to, **kwargs): + self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs) + + class RQLObjectStore(ObjectStore): """ObjectStore that works with an actual RQL repository (production mode)""" @@ -599,29 +626,45 @@ self._commit = commit or cnx.commit def commit(self): + """Commit the database transaction.""" return self._commit() def rql(self, *args): return self._cnx.execute(*args) + def prepare_insert_entity(self, *args, **kwargs): + """Given an entity type, attributes and inlined relations, returns the inserted entity's + eid. + """ + entity = self._cnx.create_entity(*args, **kwargs) + self.eids[entity.eid] = entity + self.types.setdefault(args[0], []).append(entity.eid) + return entity.eid + + def prepare_update_entity(self, etype, eid, **kwargs): + """Given an entity type and eid, updates the corresponding entity with specified attributes + and inlined relations. + """ + entity = self._cnx.entity_from_eid(eid) + assert entity.cw_etype == etype, 'Trying to update with wrong type {}'.format(etype) + # XXX some inlined relations may already exists + entity.cw_set(**kwargs) + + def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): + """Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` + and ``eid_to``. + """ + eid_from, rtype, eid_to = super(RQLObjectStore, self).prepare_insert_relation( + eid_from, rtype, eid_to, **kwargs) + self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, + {'x': int(eid_from), 'y': int(eid_to)}) + @property def session(self): warnings.warn('[3.19] deprecated property.', DeprecationWarning, stacklevel=2) return self._cnx.repo._get_session(self._cnx.sessionid) - def create_entity(self, *args, **kwargs): - entity = self._cnx.create_entity(*args, **kwargs) - self.eids[entity.eid] = entity - self.types.setdefault(args[0], []).append(entity.eid) - return entity - - def relate(self, eid_from, rtype, eid_to, **kwargs): - eid_from, rtype, eid_to = super(RQLObjectStore, self).relate( - eid_from, rtype, eid_to, **kwargs) - self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype, - {'x': int(eid_from), 'y': int(eid_to)}) - @deprecated("[3.19] use cnx.find(*args, **kwargs).entities() instead") def find_entities(self, *args, **kwargs): return self._cnx.find(*args, **kwargs).entities() @@ -630,6 +673,15 @@ def find_one_entity(self, *args, **kwargs): return self._cnx.find(*args, **kwargs).one() + @deprecated('[3.21] use prepare_insert_entity instead') + def create_entity(self, *args, **kwargs): + eid = self.prepare_insert_entity(*args, **kwargs) + return self._cnx.entity_from_eid(eid) + + @deprecated('[3.21] use prepare_insert_relation instead') + def relate(self, eid_from, rtype, eid_to, **kwargs): + self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs) + # the import controller ######################################################## class CWImportController(object): @@ -771,7 +823,10 @@ cnx.read_security = False cnx.write_security = False - def create_entity(self, etype, **kwargs): + def prepare_insert_entity(self, etype, **kwargs): + """Given an entity type, attributes and inlined relations, returns the inserted entity's + eid. + """ for k, v in kwargs.iteritems(): kwargs[k] = getattr(v, 'eid', v) entity, rels = self.metagen.base_etype_dicts(etype) @@ -798,9 +853,15 @@ self.add_relation(cnx, entity.eid, rtype, targeteids, inlined, **kwargs) self._nb_inserted_entities += 1 - return entity + return entity.eid + + # XXX: prepare_update_entity is inherited from RQLObjectStore, it should be reimplemented to + # actually skip hooks as prepare_insert_entity - def relate(self, eid_from, rtype, eid_to, **kwargs): + def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs): + """Insert into the database a relation ``rtype`` between entities with eids ``eid_from`` + and ``eid_to``. + """ assert not rtype.startswith('reverse_') self.add_relation(self._cnx, eid_from, rtype, eid_to, self.rschema(rtype).inlined) diff -r e7ee508a8b2f -r 1f5026e7d848 dataimport/test/unittest_dataimport.py --- a/dataimport/test/unittest_dataimport.py Tue Jun 23 13:08:48 2015 +0200 +++ b/dataimport/test/unittest_dataimport.py Wed Jun 24 23:23:57 2015 +0200 @@ -14,18 +14,34 @@ def test_all(self): with self.admin_access.repo_cnx() as cnx: store = dataimport.RQLObjectStore(cnx) - group_eid = store.create_entity('CWGroup', name=u'grp').eid - user_eid = store.create_entity('CWUser', login=u'lgn', upassword=u'pwd').eid + # Check data insertion + group_eid = store.prepare_insert_entity('CWGroup', name=u'grp') + user_eid = store.prepare_insert_entity('CWUser', login=u'lgn', + upassword=u'pwd') store.relate(user_eid, 'in_group', group_eid) cnx.commit() - - with self.admin_access.repo_cnx() as cnx: users = cnx.execute('CWUser X WHERE X login "lgn"') self.assertEqual(1, len(users)) self.assertEqual(user_eid, users.one().eid) groups = cnx.execute('CWGroup X WHERE U in_group X, U login "lgn"') self.assertEqual(1, len(users)) self.assertEqual(group_eid, groups.one().eid) + # Check data update + self.set_description('Check data update') + store.prepare_update_entity('CWGroup', group_eid, name=u'new_grp') + cnx.commit() + group = cnx.execute('CWGroup X WHERE X name "grp"') + self.assertEqual(len(group), 0) + group = cnx.execute('CWGroup X WHERE X name "new_grp"') + self.assertEqual, len(group), 1 + # Check data update with wrong type + with self.assertRaises(AssertionError): + store.prepare_update_entity('CWUser', group_eid, name=u'new_user') + cnx.commit() + group = cnx.execute('CWGroup X WHERE X name "new_user"') + self.assertEqual(len(group), 0) + group = cnx.execute('CWGroup X WHERE X name "new_grp"') + self.assertEqual(len(group), 1) class CreateCopyFromBufferTC(TestCase): diff -r e7ee508a8b2f -r 1f5026e7d848 doc/book/en/devrepo/dataimport.rst --- a/doc/book/en/devrepo/dataimport.rst Tue Jun 23 13:08:48 2015 +0200 +++ b/doc/book/en/devrepo/dataimport.rst Wed Jun 24 23:23:57 2015 +0200 @@ -5,29 +5,54 @@ Dataimport ========== -*CubicWeb* is designed to manipulate huge of amount of data, and provides helper functions to do so. -These functions insert data within different levels of the *CubicWeb* API, -allowing different speed/security tradeoffs. Those keeping all the *CubicWeb* hooks -and security will be slower but the possible errors in insertion -(bad data types, integrity error, ...) will be raised. +*CubicWeb* is designed to manipulate huge of amount of data, and provides utilities to do so. They +allow to insert data within different levels of the *CubicWeb* API, allowing different +speed/security tradeoffs. Those keeping all the *CubicWeb* hooks and security will be slower but the +possible errors in insertion (bad data types, integrity error, ...) will be raised. -These dataimport function are provided in the file `dataimport.py`. +These data import utilities are provided in the package `cubicweb.dataimport`. All the stores have the following API:: - >>> store = ObjectStore() - >>> user = store.create_entity('CWUser', login=u'johndoe') - >>> group = store.create_entity('CWUser', name=u'unknown') - >>> store.relate(user.eid, 'in_group', group.eid) + >>> user_eid = store.prepare_insert_entity('CWUser', login=u'johndoe') + >>> group_eid = store.prepare_insert_entity('CWUser', name=u'unknown') + >>> store.relate(user_eid, 'in_group', group_eid) + >>> store.flush() + >>> store.commit() + >>> store.finish() + +Some stores **require a flush** to copy data in the database, so if you want to have store +independent code you should explicitly call it. (There may be multiple flushes during the +process, or only one at the end if there is no memory issue). This is different from the +commit which validates the database transaction. At last, the `finish()` method should be called in +case the store requires additional work once everything is done. +* ``prepare_insert_entity(, **kwargs) -> eid``: given an entity + type, attributes and inlined relations, return the eid of the entity to be + inserted, *with no guarantee that anything has been inserted in database*. + +* ``prepare_update_entity(, eid, **kwargs) -> None``: given an + entity type and eid, promise for update given attributes and inlined + relations *with no guarantee that anything has been inserted in database*. + +* ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a + relation ``rtype`` should be added between entities with eids ``eid_from`` + and ``eid_to``. Similar to ``prepare_insert_entity()``, *there is no + guarantee that the relation has been inserted in database*. + +* ``flush() -> None``: flush any temporary data to database. May be called + several times during an import. + +* ``commit() -> None``: commit the database transaction. + +* ``finish() -> None``: additional stuff to do after import is terminated. ObjectStore ----------- -This store keeps objects in memory for *faster* validation. It may be useful -in development mode. However, as it will not enforce the constraints of the schema, -it may miss some problems. - +This store keeps objects in memory for *faster* validation. It may be useful in development +mode. However, as it will not enforce the constraints of the schema nor insert anything in the +database, so it may miss some problems. RQLObjectStore @@ -48,11 +73,3 @@ This store relies on *COPY FROM*/execute many sql commands to directly push data using SQL commands rather than using the whole *CubicWeb* API. For now, **it only works with PostgresSQL** as it requires the *COPY FROM* command. - -The API is similar to the other stores, but **it requires a flush** after some imports to copy data -in the database (these flushes may be multiples through the processes, or be done only once at the -end if there is no memory issue):: - - >>> store = SQLGenObjectStore(session) - >>> store.create_entity('Person', ...) - >>> store.flush()