[server] improve the speed of setting relations between entities (closes #1625257)
authorAlexandre Fayolle <alexandre.fayolle@logilab.fr>
Fri, 15 Apr 2011 15:42:17 +0200
changeset 7237 9f619715665b
parent 7236 b91205ada414
child 7238 576abb8c4626
[server] improve the speed of setting relations between entities (closes #1625257) The main idea is to add methods equivalent to session.add_relation and repository.glob_add_relation which handle several relations in one call. Speed gain results from: * using cursor.executemany to run SQL statements * factorizing some code which otherwise has to be performed for each relation, such as context manager creation before calling hooks or to enable security, creation of the EditedEntity instance (when several inlined relations are set on a single entity, and consequently when refreshing the cached entity) benchmark runs 1.1x faster for non inlined entities and 125x faster for inlined entities :-)
hooks/metadata.py
server/hook.py
server/querier.py
server/repository.py
server/session.py
server/sources/__init__.py
server/sources/native.py
server/ssplanner.py
server/test/unittest_repository.py
--- a/hooks/metadata.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/hooks/metadata.py	Fri Apr 15 15:42:17 2011 +0200
@@ -67,13 +67,12 @@
 
     def precommit_event(self):
         session = self.session
-        for eid in self.get_data():
-            if session.deleted_in_transaction(eid):
-                # entity have been created and deleted in the same transaction
-                continue
-            entity = session.entity_from_eid(eid)
-            if not entity.created_by:
-                session.add_relation(eid, 'created_by', session.user.eid)
+        relations = [(eid, session.user.eid) for eid in self.get_data()
+                # don't consider entities that have been created and
+                # deleted in the same transaction
+                if not session.deleted_in_transaction(eid) and \
+                   not session.entity_from_eid(eid).created_by]
+        session.add_relations([('created_by', relations)])
 
 
 class SetOwnershipHook(MetaDataHook):
--- a/server/hook.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/hook.py	Fri Apr 15 15:42:17 2011 +0200
@@ -270,13 +270,17 @@
                     'session_open', 'session_close'))
 ALL_HOOKS = ENTITIES_HOOKS | RELATIONS_HOOKS | SYSTEM_HOOKS
 
-def _iter_kwargs(entities, kwargs):
-    if not entities:
+def _iter_kwargs(entities, eids_from_to, kwargs):
+    if not entities and not eids_from_to:
         yield kwargs
-    else:
+    elif entities:
         for entity in entities:
             kwargs['entity'] = entity
             yield kwargs
+    else:
+        for subject, object in eids_from_to:
+            kwargs.update({'eidfrom': subject, 'eidto': object})
+            yield kwargs
 
 
 class HooksRegistry(CWRegistry):
@@ -304,12 +308,19 @@
             if 'entities' in kwargs:
                 assert 'entity' not in kwargs, \
                        'can\'t pass "entities" and "entity" arguments simultaneously'
+                assert 'eids_from_to' not in kwargs, \
+                       'can\'t pass "entities" and "eids_from_to" arguments simultaneously'
                 entities = kwargs.pop('entities')
+                eids_from_to = []
+            elif 'eids_from_to' in kwargs:
+                entities = []
+                eids_from_to = kwargs.pop('eids_from_to')
             else:
                 entities = []
+                eids_from_to = []
             # by default, hooks are executed with security turned off
             with security_enabled(session, read=False):
-                for _kwargs in _iter_kwargs(entities, kwargs):
+                for _kwargs in _iter_kwargs(entities, eids_from_to, kwargs):
                     hooks = sorted(self.possible_objects(session, **_kwargs),
                                    key=lambda x: x.order)
                     with security_enabled(session, write=False):
--- a/server/querier.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/querier.py	Fri Apr 15 15:42:17 2011 +0200
@@ -556,6 +556,8 @@
     def insert_relation_defs(self):
         session = self.session
         repo = session.repo
+        edited_entities = {}
+        relations = {}
         for subj, rtype, obj in self.relation_defs():
             # if a string is given into args instead of an int, we get it here
             if isinstance(subj, basestring):
@@ -567,12 +569,21 @@
             elif not isinstance(obj, (int, long)):
                 obj = obj.entity.eid
             if repo.schema.rschema(rtype).inlined:
-                entity = session.entity_from_eid(subj)
-                edited = EditedEntity(entity)
+                if subj not in edited_entities:
+                    entity = session.entity_from_eid(subj)
+                    edited = EditedEntity(entity)
+                    edited_entities[subj] = edited
+                else:
+                    edited = edited_entities[subj]
                 edited.edited_attribute(rtype, obj)
-                repo.glob_update_entity(session, edited)
             else:
-                repo.glob_add_relation(session, subj, rtype, obj)
+                if rtype in relations:
+                    relations[rtype].append((subj, obj))
+                else:
+                    relations[rtype] = [(subj, obj)]
+        repo.glob_add_relations(session, relations)
+        for edited in edited_entities.itervalues():
+            repo.glob_update_entity(session, edited)
 
 
 class QuerierHelper(object):
--- a/server/repository.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/repository.py	Fri Apr 15 15:42:17 2011 +0200
@@ -1375,19 +1375,37 @@
 
     def glob_add_relation(self, session, subject, rtype, object):
         """add a relation to the repository"""
-        if server.DEBUG & server.DBG_REPO:
-            print 'ADD relation', subject, rtype, object
-        source = self.locate_relation_source(session, subject, rtype, object)
-        if source.should_call_hooks:
-            del_existing_rel_if_needed(session, subject, rtype, object)
-            self.hm.call_hooks('before_add_relation', session,
-                               eidfrom=subject, rtype=rtype, eidto=object)
-        source.add_relation(session, subject, rtype, object)
-        rschema = self.schema.rschema(rtype)
-        session.update_rel_cache_add(subject, rtype, object, rschema.symmetric)
-        if source.should_call_hooks:
-            self.hm.call_hooks('after_add_relation', session,
-                               eidfrom=subject, rtype=rtype, eidto=object)
+        self.glob_add_relations(session, {rtype: [(subject, object)]})
+
+    def glob_add_relations(self, session, relations):
+        """add several relations to the repository
+
+        relations is a dictionary rtype: [(subj_eid, obj_eid), ...]
+        """
+        sources = {}
+        for rtype, eids_subj_obj in relations.iteritems():
+            if server.DEBUG & server.DBG_REPO:
+                for subject, object in relations:
+                    print 'ADD relation', subject, rtype, object
+            for subject, object in eids_subj_obj:
+                source = self.locate_relation_source(session, subject, rtype, object)
+                if source in sources:
+                    sources[source][1].append((subject, object))
+                else:
+                    sources[source] = rtype, [(subject, object)]
+        for source, (rtype, source_relations) in sources.iteritems():
+            if source.should_call_hooks:
+                for subject, object in source_relations:
+                    del_existing_rel_if_needed(session, subject, rtype, object)
+                self.hm.call_hooks('before_add_relation', session,
+                                    rtype=rtype, eids_from_to=source_relations)
+            source.add_relations(session, rtype, source_relations)
+            rschema = self.schema.rschema(rtype)
+            for subject, object in source_relations:
+                session.update_rel_cache_add(subject, rtype, object, rschema.symmetric)
+            if source.should_call_hooks:
+                self.hm.call_hooks('after_add_relation', session,
+                                   rtype=rtype, eids_from_to=source_relations)
 
     def glob_delete_relation(self, session, subject, rtype, object):
         """delete a relation from the repository"""
--- a/server/session.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/session.py	Fri Apr 15 15:42:17 2011 +0200
@@ -213,14 +213,34 @@
         You may use this in hooks when you know both eids of the relation you
         want to add.
         """
+        self.add_relations([(rtype, [(fromeid,  toeid)])])
+
+    def add_relations(self, relations):
+        '''set many relation using a shortcut similar to the one in add_relation
+        
+        relations is a list of 2-uples, the first element of each
+        2-uple is the rtype, and the second is a list of (fromeid,
+        toeid) tuples
+        '''
+        edited_entities = {}
+        relations_dict = {}
         with security_enabled(self, False, False):
-            if self.vreg.schema[rtype].inlined:
-                entity = self.entity_from_eid(fromeid)
-                edited = EditedEntity(entity)
-                edited.edited_attribute(rtype, toeid)
+            for rtype, eids in relations:
+                if self.vreg.schema[rtype].inlined:
+                    for fromeid, toeid in eids:
+                        if fromeid not in edited_entities:
+                            entity = self.entity_from_eid(fromeid)
+                            edited = EditedEntity(entity)
+                            edited_entities[fromeid] = edited
+                        else:
+                            edited = edited_entities[fromeid]
+                        edited.edited_attribute(rtype, toeid)
+                else:
+                    relations_dict[rtype] = eids
+            self.repo.glob_add_relations(self, relations_dict)
+            for edited in edited_entities.itervalues():
                 self.repo.glob_update_entity(self, edited)
-            else:
-                self.repo.glob_add_relation(self, fromeid, rtype, toeid)
+
 
     def delete_relation(self, fromeid, rtype, toeid):
         """provide direct access to the repository method to delete a relation.
--- a/server/sources/__init__.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/sources/__init__.py	Fri Apr 15 15:42:17 2011 +0200
@@ -434,6 +434,13 @@
         """add a relation to the source"""
         raise NotImplementedError()
 
+    def add_relations(self, session,  rtype, subj_obj_list):
+        """add a relations to the source"""
+        # override in derived classes if you feel you can
+        # optimize
+        for subject, object in subj_obj_list:
+            self.add_relation(session, subject, rtype, object)
+
     def delete_relation(self, session, subject, rtype, object):
         """delete a relation from the source"""
         raise NotImplementedError()
--- a/server/sources/native.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/sources/native.py	Fri Apr 15 15:42:17 2011 +0200
@@ -628,22 +628,32 @@
 
     def add_relation(self, session, subject, rtype, object, inlined=False):
         """add a relation to the source"""
-        self._add_relation(session, subject, rtype, object, inlined)
+        self._add_relations(session,  rtype, [(subject, object)], inlined)
         if session.undoable_action('A', rtype):
             self._record_tx_action(session, 'tx_relation_actions', 'A',
                                    eid_from=subject, rtype=rtype, eid_to=object)
 
-    def _add_relation(self, session, subject, rtype, object, inlined=False):
+    def add_relations(self, session,  rtype, subj_obj_list, inlined=False):
+        """add a relations to the source"""
+        self._add_relations(session, rtype, subj_obj_list, inlined)
+        if session.undoable_action('A', rtype):
+            for subject, object in subj_obj_list:
+                self._record_tx_action(session, 'tx_relation_actions', 'A',
+                                       eid_from=subject, rtype=rtype, eid_to=object)
+                
+    def _add_relations(self, session, rtype, subj_obj_list, inlined=False):
         """add a relation to the source"""
         if inlined is False:
-            attrs = {'eid_from': subject, 'eid_to': object}
-            sql = self.sqlgen.insert('%s_relation' % rtype, attrs)
+            attrs = [{'eid_from': subject, 'eid_to': object}
+                     for subject, object in subj_obj_list]
+            sql = self.sqlgen.insert('%s_relation' % rtype, attrs[0])
         else: # used by data import
             etype = session.describe(subject)[0]
-            attrs = {'cw_eid': subject, SQL_PREFIX + rtype: object}
-            sql = self.sqlgen.update(SQL_PREFIX + etype, attrs,
+            attrs = [{'cw_eid': subject, SQL_PREFIX + rtype: object}
+                     for subject, object in subj_obj_list]
+            sql = self.sqlgen.update(SQL_PREFIX + etype, attrs[0],
                                      ['cw_eid'])
-        self.doexec(session, sql, attrs)
+        self.doexecmany(session, sql, attrs)
 
     def delete_relation(self, session, subject, rtype, object):
         """delete a relation from the source"""
--- a/server/ssplanner.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/ssplanner.py	Fri Apr 15 15:42:17 2011 +0200
@@ -559,6 +559,7 @@
         session = self.plan.session
         repo = session.repo
         edefs = {}
+        relations = {}
         # insert relations
         if self.children:
             result = self.execute_child()
@@ -578,9 +579,14 @@
                         edefs[eid] = edited = EditedEntity(edef)
                     edited.edited_attribute(str(rschema), rhsval)
                 else:
-                    repo.glob_add_relation(session, lhsval, str(rschema), rhsval)
+                    str_rschema = str(rschema)
+                    if str_rschema in relations:
+                        relations[str_rschema].append((lhsval, rhsval))
+                    else:
+                        relations[str_rschema] = [(lhsval, rhsval)]
             result[i] = newrow
         # update entities
+        repo.glob_add_relations(session, relations)
         for eid, edited in edefs.iteritems():
             repo.glob_update_entity(session, edited)
         return result
--- a/server/test/unittest_repository.py	Fri Apr 15 16:05:20 2011 +0200
+++ b/server/test/unittest_repository.py	Fri Apr 15 15:42:17 2011 +0200
@@ -757,5 +757,73 @@
         t3 = time.time()
         self.info('commit creation: %.2gs', (t3 - t2))
 
+
+    def test_session_add_relation(self):
+        """ to be compared with test_session_add_relations"""
+        req = self.request()
+        personnes = []
+        for i in xrange(2000):
+            p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+            personnes.append(p)
+        abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+        req.cnx.commit()
+        t0 = time.time()
+        add_relation = self.session.add_relation
+        for p in personnes:
+            add_relation(abraham.eid, 'personne_composite', p.eid)
+        req.cnx.commit()
+        t1 = time.time()
+        self.info('add relation: %.2gs', t1-t0)
+
+    def test_session_add_relations (self):
+        """ to be compared with test_session_add_relation"""
+        req = self.request()
+        personnes = []
+        for i in xrange(2000):
+            p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+            personnes.append(p)
+        abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+        req.cnx.commit()
+        t0 = time.time()
+        add_relations = self.session.add_relations
+        relations = [('personne_composite', [(abraham.eid, p.eid) for p in personnes])]
+        add_relations(relations)
+        req.cnx.commit()
+        t1 = time.time()
+        self.info('add relations: %.2gs', t1-t0)
+    def test_session_add_relation_inlined(self):
+        """ to be compared with test_session_add_relations"""
+        req = self.request()
+        personnes = []
+        for i in xrange(2000):
+            p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+            personnes.append(p)
+        abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+        req.cnx.commit()
+        t0 = time.time()
+        add_relation = self.session.add_relation
+        for p in personnes:
+            add_relation(abraham.eid, 'personne_inlined', p.eid)
+        req.cnx.commit()
+        t1 = time.time()
+        self.info('add relation (inlined): %.2gs', t1-t0)
+
+    def test_session_add_relations_inlined (self):
+        """ to be compared with test_session_add_relation"""
+        req = self.request()
+        personnes = []
+        for i in xrange(2000):
+            p = req.create_entity('Personne', nom=u'Doe%03d'%i, prenom=u'John', sexe=u'M')
+            personnes.append(p)
+        abraham = req.create_entity('Personne', nom=u'Abraham', prenom=u'John', sexe=u'M')
+        req.cnx.commit()
+        t0 = time.time()
+        add_relations = self.session.add_relations
+        relations = [('personne_inlined', [(abraham.eid, p.eid) for p in personnes])]
+        add_relations(relations)
+        req.cnx.commit()
+        t1 = time.time()
+        self.info('add relations (inlined): %.2gs', t1-t0)
+
 if __name__ == '__main__':
     unittest_main()