--- a/dataimport/__init__.py Tue Jun 23 13:08:48 2015 +0200
+++ b/dataimport/__init__.py Wed Jun 24 23:23:57 2015 +0200
@@ -37,10 +37,10 @@
entity = mk_entity(row, USERS)
entity['upassword'] = 'motdepasse'
ctl.check('login', entity['login'], None)
- entity = ctl.store.create_entity('CWUser', **entity)
- email = ctl.store.create_entity('EmailAddress', address=row['email'])
- ctl.store.relate(entity.eid, 'use_email', email.eid)
- ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x':entity['eid']})
+ entity = ctl.store.prepare_insert_entity('CWUser', **entity)
+ email = ctl.store.prepare_insert_entity('EmailAddress', address=row['email'])
+ ctl.store.prepare_insert_relation(entity, 'use_email', email)
+ ctl.store.rql('SET U in_group G WHERE G name "users", U eid %(x)s', {'x': entity})
CHK = [('login', check_doubles, 'Utilisateurs Login',
'Deux utilisateurs ne devraient pas avoir le même login.'),
@@ -543,9 +543,9 @@
But it will not enforce the constraints of the schema and hence will miss some problems
>>> store = ObjectStore()
- >>> user = store.create_entity('CWUser', login=u'johndoe')
- >>> group = store.create_entity('CWUser', name=u'unknown')
- >>> store.relate(user.eid, 'in_group', group.eid)
+ >>> user = store.prepare_insert_entity('CWUser', login=u'johndoe')
+ >>> group = store.prepare_insert_entity('CWUser', name=u'unknown')
+ >>> store.prepare_insert_relation(user, 'in_group', group)
"""
def __init__(self):
self.items = []
@@ -554,26 +554,43 @@
self.relations = set()
self.indexes = {}
- def create_entity(self, etype, **data):
+ def prepare_insert_entity(self, etype, **data):
+ """Given an entity type, attributes and inlined relations, return an eid for the entity that
+ would be inserted with a real store.
+ """
data = attrdict(data)
data['eid'] = eid = len(self.items)
self.items.append(data)
self.eids[eid] = data
self.types.setdefault(etype, []).append(eid)
- return data
+ return eid
- def relate(self, eid_from, rtype, eid_to, **kwargs):
- """Add new relation"""
+ def prepare_update_entity(self, etype, eid, **kwargs):
+ """Given an entity type and eid, updates the corresponding fake entity with specified
+ attributes and inlined relations.
+ """
+ assert eid in self.types[etype], 'Trying to update with wrong type {}'.format(etype)
+ data = self.eids[eid]
+ data.update(kwargs)
+
+ def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
+ """Store into the `relations` attribute that a relation ``rtype`` exists between entities
+ with eids ``eid_from`` and ``eid_to``.
+ """
relation = eid_from, rtype, eid_to
self.relations.add(relation)
return relation
+ def flush(self):
+ """Nothing to flush for this store."""
+ pass
+
def commit(self):
- """this commit method does nothing by default"""
+ """Nothing to commit for this store."""
return
- def flush(self):
- """The method is provided so that all stores share a common API"""
+ def finish(self):
+ """Nothing to do once import is terminated for this store."""
pass
@property
@@ -586,6 +603,16 @@
def nb_inserted_relations(self):
return len(self.relations)
+ @deprecated('[3.21] use prepare_insert_entity instead')
+ def create_entity(self, etype, **data):
+ self.prepare_insert_entity(etype, **data)
+ return attrdict(data)
+
+ @deprecated('[3.21] use prepare_insert_relation instead')
+ def relate(self, eid_from, rtype, eid_to, **kwargs):
+ self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs)
+
+
class RQLObjectStore(ObjectStore):
"""ObjectStore that works with an actual RQL repository (production mode)"""
@@ -599,29 +626,45 @@
self._commit = commit or cnx.commit
def commit(self):
+ """Commit the database transaction."""
return self._commit()
def rql(self, *args):
return self._cnx.execute(*args)
+ def prepare_insert_entity(self, *args, **kwargs):
+ """Given an entity type, attributes and inlined relations, returns the inserted entity's
+ eid.
+ """
+ entity = self._cnx.create_entity(*args, **kwargs)
+ self.eids[entity.eid] = entity
+ self.types.setdefault(args[0], []).append(entity.eid)
+ return entity.eid
+
+ def prepare_update_entity(self, etype, eid, **kwargs):
+ """Given an entity type and eid, updates the corresponding entity with specified attributes
+ and inlined relations.
+ """
+ entity = self._cnx.entity_from_eid(eid)
+ assert entity.cw_etype == etype, 'Trying to update with wrong type {}'.format(etype)
+ # XXX some inlined relations may already exists
+ entity.cw_set(**kwargs)
+
+ def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
+ """Insert into the database a relation ``rtype`` between entities with eids ``eid_from``
+ and ``eid_to``.
+ """
+ eid_from, rtype, eid_to = super(RQLObjectStore, self).prepare_insert_relation(
+ eid_from, rtype, eid_to, **kwargs)
+ self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
+ {'x': int(eid_from), 'y': int(eid_to)})
+
@property
def session(self):
warnings.warn('[3.19] deprecated property.', DeprecationWarning,
stacklevel=2)
return self._cnx.repo._get_session(self._cnx.sessionid)
- def create_entity(self, *args, **kwargs):
- entity = self._cnx.create_entity(*args, **kwargs)
- self.eids[entity.eid] = entity
- self.types.setdefault(args[0], []).append(entity.eid)
- return entity
-
- def relate(self, eid_from, rtype, eid_to, **kwargs):
- eid_from, rtype, eid_to = super(RQLObjectStore, self).relate(
- eid_from, rtype, eid_to, **kwargs)
- self.rql('SET X %s Y WHERE X eid %%(x)s, Y eid %%(y)s' % rtype,
- {'x': int(eid_from), 'y': int(eid_to)})
-
@deprecated("[3.19] use cnx.find(*args, **kwargs).entities() instead")
def find_entities(self, *args, **kwargs):
return self._cnx.find(*args, **kwargs).entities()
@@ -630,6 +673,15 @@
def find_one_entity(self, *args, **kwargs):
return self._cnx.find(*args, **kwargs).one()
+ @deprecated('[3.21] use prepare_insert_entity instead')
+ def create_entity(self, *args, **kwargs):
+ eid = self.prepare_insert_entity(*args, **kwargs)
+ return self._cnx.entity_from_eid(eid)
+
+ @deprecated('[3.21] use prepare_insert_relation instead')
+ def relate(self, eid_from, rtype, eid_to, **kwargs):
+ self.prepare_insert_relation(eid_from, rtype, eid_to, **kwargs)
+
# the import controller ########################################################
class CWImportController(object):
@@ -771,7 +823,10 @@
cnx.read_security = False
cnx.write_security = False
- def create_entity(self, etype, **kwargs):
+ def prepare_insert_entity(self, etype, **kwargs):
+ """Given an entity type, attributes and inlined relations, returns the inserted entity's
+ eid.
+ """
for k, v in kwargs.iteritems():
kwargs[k] = getattr(v, 'eid', v)
entity, rels = self.metagen.base_etype_dicts(etype)
@@ -798,9 +853,15 @@
self.add_relation(cnx, entity.eid, rtype, targeteids,
inlined, **kwargs)
self._nb_inserted_entities += 1
- return entity
+ return entity.eid
+
+ # XXX: prepare_update_entity is inherited from RQLObjectStore, it should be reimplemented to
+ # actually skip hooks as prepare_insert_entity
- def relate(self, eid_from, rtype, eid_to, **kwargs):
+ def prepare_insert_relation(self, eid_from, rtype, eid_to, **kwargs):
+ """Insert into the database a relation ``rtype`` between entities with eids ``eid_from``
+ and ``eid_to``.
+ """
assert not rtype.startswith('reverse_')
self.add_relation(self._cnx, eid_from, rtype, eid_to,
self.rschema(rtype).inlined)
--- a/doc/book/en/devrepo/dataimport.rst Tue Jun 23 13:08:48 2015 +0200
+++ b/doc/book/en/devrepo/dataimport.rst Wed Jun 24 23:23:57 2015 +0200
@@ -5,29 +5,54 @@
Dataimport
==========
-*CubicWeb* is designed to manipulate huge of amount of data, and provides helper functions to do so.
-These functions insert data within different levels of the *CubicWeb* API,
-allowing different speed/security tradeoffs. Those keeping all the *CubicWeb* hooks
-and security will be slower but the possible errors in insertion
-(bad data types, integrity error, ...) will be raised.
+*CubicWeb* is designed to manipulate huge of amount of data, and provides utilities to do so. They
+allow to insert data within different levels of the *CubicWeb* API, allowing different
+speed/security tradeoffs. Those keeping all the *CubicWeb* hooks and security will be slower but the
+possible errors in insertion (bad data types, integrity error, ...) will be raised.
-These dataimport function are provided in the file `dataimport.py`.
+These data import utilities are provided in the package `cubicweb.dataimport`.
All the stores have the following API::
- >>> store = ObjectStore()
- >>> user = store.create_entity('CWUser', login=u'johndoe')
- >>> group = store.create_entity('CWUser', name=u'unknown')
- >>> store.relate(user.eid, 'in_group', group.eid)
+ >>> user_eid = store.prepare_insert_entity('CWUser', login=u'johndoe')
+ >>> group_eid = store.prepare_insert_entity('CWUser', name=u'unknown')
+ >>> store.relate(user_eid, 'in_group', group_eid)
+ >>> store.flush()
+ >>> store.commit()
+ >>> store.finish()
+
+Some stores **require a flush** to copy data in the database, so if you want to have store
+independent code you should explicitly call it. (There may be multiple flushes during the
+process, or only one at the end if there is no memory issue). This is different from the
+commit which validates the database transaction. At last, the `finish()` method should be called in
+case the store requires additional work once everything is done.
+* ``prepare_insert_entity(<entity type>, **kwargs) -> eid``: given an entity
+ type, attributes and inlined relations, return the eid of the entity to be
+ inserted, *with no guarantee that anything has been inserted in database*.
+
+* ``prepare_update_entity(<entity type>, eid, **kwargs) -> None``: given an
+ entity type and eid, promise for update given attributes and inlined
+ relations *with no guarantee that anything has been inserted in database*.
+
+* ``prepare_insert_relation(eid_from, rtype, eid_to) -> None``: indicate that a
+ relation ``rtype`` should be added between entities with eids ``eid_from``
+ and ``eid_to``. Similar to ``prepare_insert_entity()``, *there is no
+ guarantee that the relation has been inserted in database*.
+
+* ``flush() -> None``: flush any temporary data to database. May be called
+ several times during an import.
+
+* ``commit() -> None``: commit the database transaction.
+
+* ``finish() -> None``: additional stuff to do after import is terminated.
ObjectStore
-----------
-This store keeps objects in memory for *faster* validation. It may be useful
-in development mode. However, as it will not enforce the constraints of the schema,
-it may miss some problems.
-
+This store keeps objects in memory for *faster* validation. It may be useful in development
+mode. However, as it will not enforce the constraints of the schema nor insert anything in the
+database, so it may miss some problems.
RQLObjectStore
@@ -48,11 +73,3 @@
This store relies on *COPY FROM*/execute many sql commands to directly push data using SQL commands
rather than using the whole *CubicWeb* API. For now, **it only works with PostgresSQL** as it requires
the *COPY FROM* command.
-
-The API is similar to the other stores, but **it requires a flush** after some imports to copy data
-in the database (these flushes may be multiples through the processes, or be done only once at the
-end if there is no memory issue)::
-
- >>> store = SQLGenObjectStore(session)
- >>> store.create_entity('Person', ...)
- >>> store.flush()