[server] improve the speed of setting relations between entities (closes #1625257)
The main idea is to add methods equivalent to session.add_relation and
repository.glob_add_relation which handle several relations in one call. Speed
gain results from:
* using cursor.executemany to run SQL statements
* factorizing some code which otherwise has to be performed for each relation,
such as context manager creation before calling hooks or to enable security,
creation of the EditedEntity instance (when several inlined relations are set
on a single entity, and consequently when refreshing the cached entity)
benchmark runs 1.1x faster for non inlined entities and 125x faster for inlined
entities :-)
--- a/hooks/metadata.py Fri Apr 15 16:05:20 2011 +0200
+++ b/hooks/metadata.py Fri Apr 15 15:42:17 2011 +0200
@@ -67,13 +67,12 @@
def precommit_event(self):
session = self.session
- for eid in self.get_data():
- if session.deleted_in_transaction(eid):
- # entity have been created and deleted in the same transaction
- continue
- entity = session.entity_from_eid(eid)
- if not entity.created_by:
- session.add_relation(eid, 'created_by', session.user.eid)
+ relations = [(eid, session.user.eid) for eid in self.get_data()
+ # don't consider entities that have been created and
+ # deleted in the same transaction
+ if not session.deleted_in_transaction(eid) and \
+ not session.entity_from_eid(eid).created_by]
+ session.add_relations([('created_by', relations)])
class SetOwnershipHook(MetaDataHook):
--- a/server/hook.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/hook.py Fri Apr 15 15:42:17 2011 +0200
@@ -270,13 +270,17 @@
'session_open', 'session_close'))
ALL_HOOKS = ENTITIES_HOOKS | RELATIONS_HOOKS | SYSTEM_HOOKS
-def _iter_kwargs(entities, kwargs):
- if not entities:
+def _iter_kwargs(entities, eids_from_to, kwargs):
+ if not entities and not eids_from_to:
yield kwargs
- else:
+ elif entities:
for entity in entities:
kwargs['entity'] = entity
yield kwargs
+ else:
+ for subject, object in eids_from_to:
+ kwargs.update({'eidfrom': subject, 'eidto': object})
+ yield kwargs
class HooksRegistry(CWRegistry):
@@ -304,12 +308,19 @@
if 'entities' in kwargs:
assert 'entity' not in kwargs, \
'can\'t pass "entities" and "entity" arguments simultaneously'
+ assert 'eids_from_to' not in kwargs, \
+ 'can\'t pass "entities" and "eids_from_to" arguments simultaneously'
entities = kwargs.pop('entities')
+ eids_from_to = []
+ elif 'eids_from_to' in kwargs:
+ entities = []
+ eids_from_to = kwargs.pop('eids_from_to')
else:
entities = []
+ eids_from_to = []
# by default, hooks are executed with security turned off
with security_enabled(session, read=False):
- for _kwargs in _iter_kwargs(entities, kwargs):
+ for _kwargs in _iter_kwargs(entities, eids_from_to, kwargs):
hooks = sorted(self.possible_objects(session, **_kwargs),
key=lambda x: x.order)
with security_enabled(session, write=False):
--- a/server/querier.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/querier.py Fri Apr 15 15:42:17 2011 +0200
@@ -556,6 +556,8 @@
def insert_relation_defs(self):
session = self.session
repo = session.repo
+ edited_entities = {}
+ relations = {}
for subj, rtype, obj in self.relation_defs():
# if a string is given into args instead of an int, we get it here
if isinstance(subj, basestring):
@@ -567,12 +569,21 @@
elif not isinstance(obj, (int, long)):
obj = obj.entity.eid
if repo.schema.rschema(rtype).inlined:
- entity = session.entity_from_eid(subj)
- edited = EditedEntity(entity)
+ if subj not in edited_entities:
+ entity = session.entity_from_eid(subj)
+ edited = EditedEntity(entity)
+ edited_entities[subj] = edited
+ else:
+ edited = edited_entities[subj]
edited.edited_attribute(rtype, obj)
- repo.glob_update_entity(session, edited)
else:
- repo.glob_add_relation(session, subj, rtype, obj)
+ if rtype in relations:
+ relations[rtype].append((subj, obj))
+ else:
+ relations[rtype] = [(subj, obj)]
+ repo.glob_add_relations(session, relations)
+ for edited in edited_entities.itervalues():
+ repo.glob_update_entity(session, edited)
class QuerierHelper(object):
--- a/server/repository.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/repository.py Fri Apr 15 15:42:17 2011 +0200
@@ -1375,19 +1375,37 @@
def glob_add_relation(self, session, subject, rtype, object):
"""add a relation to the repository"""
- if server.DEBUG & server.DBG_REPO:
- print 'ADD relation', subject, rtype, object
- source = self.locate_relation_source(session, subject, rtype, object)
- if source.should_call_hooks:
- del_existing_rel_if_needed(session, subject, rtype, object)
- self.hm.call_hooks('before_add_relation', session,
- eidfrom=subject, rtype=rtype, eidto=object)
- source.add_relation(session, subject, rtype, object)
- rschema = self.schema.rschema(rtype)
- session.update_rel_cache_add(subject, rtype, object, rschema.symmetric)
- if source.should_call_hooks:
- self.hm.call_hooks('after_add_relation', session,
- eidfrom=subject, rtype=rtype, eidto=object)
+ self.glob_add_relations(session, {rtype: [(subject, object)]})
+
+ def glob_add_relations(self, session, relations):
+ """add several relations to the repository
+
+ relations is a dictionary rtype: [(subj_eid, obj_eid), ...]
+ """
+ sources = {}
+ for rtype, eids_subj_obj in relations.iteritems():
+ if server.DEBUG & server.DBG_REPO:
+ for subject, object in relations:
+ print 'ADD relation', subject, rtype, object
+ for subject, object in eids_subj_obj:
+ source = self.locate_relation_source(session, subject, rtype, object)
+ if source in sources:
+ sources[source][1].append((subject, object))
+ else:
+ sources[source] = rtype, [(subject, object)]
+ for source, (rtype, source_relations) in sources.iteritems():
+ if source.should_call_hooks:
+ for subject, object in source_relations:
+ del_existing_rel_if_needed(session, subject, rtype, object)
+ self.hm.call_hooks('before_add_relation', session,
+ rtype=rtype, eids_from_to=source_relations)
+ source.add_relations(session, rtype, source_relations)
+ rschema = self.schema.rschema(rtype)
+ for subject, object in source_relations:
+ session.update_rel_cache_add(subject, rtype, object, rschema.symmetric)
+ if source.should_call_hooks:
+ self.hm.call_hooks('after_add_relation', session,
+ rtype=rtype, eids_from_to=source_relations)
def glob_delete_relation(self, session, subject, rtype, object):
"""delete a relation from the repository"""
--- a/server/session.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/session.py Fri Apr 15 15:42:17 2011 +0200
@@ -213,14 +213,34 @@
You may use this in hooks when you know both eids of the relation you
want to add.
"""
+ self.add_relations([(rtype, [(fromeid, toeid)])])
+
+ def add_relations(self, relations):
+ '''set many relation using a shortcut similar to the one in add_relation
+
+ relations is a list of 2-uples, the first element of each
+ 2-uple is the rtype, and the second is a list of (fromeid,
+ toeid) tuples
+ '''
+ edited_entities = {}
+ relations_dict = {}
with security_enabled(self, False, False):
- if self.vreg.schema[rtype].inlined:
- entity = self.entity_from_eid(fromeid)
- edited = EditedEntity(entity)
- edited.edited_attribute(rtype, toeid)
+ for rtype, eids in relations:
+ if self.vreg.schema[rtype].inlined:
+ for fromeid, toeid in eids:
+ if fromeid not in edited_entities:
+ entity = self.entity_from_eid(fromeid)
+ edited = EditedEntity(entity)
+ edited_entities[fromeid] = edited
+ else:
+ edited = edited_entities[fromeid]
+ edited.edited_attribute(rtype, toeid)
+ else:
+ relations_dict[rtype] = eids
+ self.repo.glob_add_relations(self, relations_dict)
+ for edited in edited_entities.itervalues():
self.repo.glob_update_entity(self, edited)
- else:
- self.repo.glob_add_relation(self, fromeid, rtype, toeid)
+
def delete_relation(self, fromeid, rtype, toeid):
"""provide direct access to the repository method to delete a relation.
--- a/server/sources/__init__.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/sources/__init__.py Fri Apr 15 15:42:17 2011 +0200
@@ -434,6 +434,13 @@
"""add a relation to the source"""
raise NotImplementedError()
+ def add_relations(self, session, rtype, subj_obj_list):
+ """add a relations to the source"""
+ # override in derived classes if you feel you can
+ # optimize
+ for subject, object in subj_obj_list:
+ self.add_relation(session, subject, rtype, object)
+
def delete_relation(self, session, subject, rtype, object):
"""delete a relation from the source"""
raise NotImplementedError()
--- a/server/sources/native.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/sources/native.py Fri Apr 15 15:42:17 2011 +0200
@@ -628,22 +628,32 @@
def add_relation(self, session, subject, rtype, object, inlined=False):
"""add a relation to the source"""
- self._add_relation(session, subject, rtype, object, inlined)
+ self._add_relations(session, rtype, [(subject, object)], inlined)
if session.undoable_action('A', rtype):
self._record_tx_action(session, 'tx_relation_actions', 'A',
eid_from=subject, rtype=rtype, eid_to=object)
- def _add_relation(self, session, subject, rtype, object, inlined=False):
+ def add_relations(self, session, rtype, subj_obj_list, inlined=False):
+ """add a relations to the source"""
+ self._add_relations(session, rtype, subj_obj_list, inlined)
+ if session.undoable_action('A', rtype):
+ for subject, object in subj_obj_list:
+ self._record_tx_action(session, 'tx_relation_actions', 'A',
+ eid_from=subject, rtype=rtype, eid_to=object)
+
+ def _add_relations(self, session, rtype, subj_obj_list, inlined=False):
"""add a relation to the source"""
if inlined is False:
- attrs = {'eid_from': subject, 'eid_to': object}
- sql = self.sqlgen.insert('%s_relation' % rtype, attrs)
+ attrs = [{'eid_from': subject, 'eid_to': object}
+ for subject, object in subj_obj_list]
+ sql = self.sqlgen.insert('%s_relation' % rtype, attrs[0])
else: # used by data import
etype = session.describe(subject)[0]
- attrs = {'cw_eid': subject, SQL_PREFIX + rtype: object}
- sql = self.sqlgen.update(SQL_PREFIX + etype, attrs,
+ attrs = [{'cw_eid': subject, SQL_PREFIX + rtype: object}
+ for subject, object in subj_obj_list]
+ sql = self.sqlgen.update(SQL_PREFIX + etype, attrs[0],
['cw_eid'])
- self.doexec(session, sql, attrs)
+ self.doexecmany(session, sql, attrs)
def delete_relation(self, session, subject, rtype, object):
"""delete a relation from the source"""
--- a/server/ssplanner.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/ssplanner.py Fri Apr 15 15:42:17 2011 +0200
@@ -559,6 +559,7 @@
session = self.plan.session
repo = session.repo
edefs = {}
+ relations = {}
# insert relations
if self.children:
result = self.execute_child()
@@ -578,9 +579,14 @@
edefs[eid] = edited = EditedEntity(edef)
edited.edited_attribute(str(rschema), rhsval)
else:
- repo.glob_add_relation(session, lhsval, str(rschema), rhsval)
+ str_rschema = str(rschema)
+ if str_rschema in relations:
+ relations[str_rschema].append((lhsval, rhsval))
+ else:
+ relations[str_rschema] = [(lhsval, rhsval)]
result[i] = newrow
# update entities
+ repo.glob_add_relations(session, relations)
for eid, edited in edefs.iteritems():
repo.glob_update_entity(session, edited)
return result
--- a/server/test/unittest_repository.py Fri Apr 15 16:05:20 2011 +0200
+++ b/server/test/unittest_repository.py Fri Apr 15 15:42:17 2011 +0200
@@ -757,5 +757,73 @@
t3 = time.time()
self.info('commit creation: %.2gs', (t3 - t2))
+
+ def test_session_add_relation(self):
+ """ to be compared with test_session_add_relations"""
+ req = self.request()
+ personnes = []
+ for i in xrange(2000):
+ p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+ personnes.append(p)
+ abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+ req.cnx.commit()
+ t0 = time.time()
+ add_relation = self.session.add_relation
+ for p in personnes:
+ add_relation(abraham.eid, 'personne_composite', p.eid)
+ req.cnx.commit()
+ t1 = time.time()
+ self.info('add relation: %.2gs', t1-t0)
+
+ def test_session_add_relations (self):
+ """ to be compared with test_session_add_relation"""
+ req = self.request()
+ personnes = []
+ for i in xrange(2000):
+ p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+ personnes.append(p)
+ abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+ req.cnx.commit()
+ t0 = time.time()
+ add_relations = self.session.add_relations
+ relations = [('personne_composite', [(abraham.eid, p.eid) for p in personnes])]
+ add_relations(relations)
+ req.cnx.commit()
+ t1 = time.time()
+ self.info('add relations: %.2gs', t1-t0)
+ def test_session_add_relation_inlined(self):
+ """ to be compared with test_session_add_relations"""
+ req = self.request()
+ personnes = []
+ for i in xrange(2000):
+ p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+ personnes.append(p)
+ abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+ req.cnx.commit()
+ t0 = time.time()
+ add_relation = self.session.add_relation
+ for p in personnes:
+ add_relation(abraham.eid, 'personne_inlined', p.eid)
+ req.cnx.commit()
+ t1 = time.time()
+ self.info('add relation (inlined): %.2gs', t1-t0)
+
+ def test_session_add_relations_inlined (self):
+ """ to be compared with test_session_add_relation"""
+ req = self.request()
+ personnes = []
+ for i in xrange(2000):
+ p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+ personnes.append(p)
+ abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+ req.cnx.commit()
+ t0 = time.time()
+ add_relations = self.session.add_relations
+ relations = [('personne_inlined', [(abraham.eid, p.eid) for p in personnes])]
+ add_relations(relations)
+ req.cnx.commit()
+ t1 = time.time()
+ self.info('add relations (inlined): %.2gs', t1-t0)
+
if __name__ == '__main__':
unittest_main()