enable server side entity caching, 25% speedup on codenaf insertion. ALL CW TESTS OK
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Sun, 02 Aug 2009 12:00:17 +0200
changeset 2647 b0a2e779845c
parent 2646 d2874ddd4347
child 2648 4ae7d02ce063
child 2651 3ad936634d2a
enable server side entity caching, 25% speedup on codenaf insertion. ALL CW TESTS OK
__init__.py
entity.py
rset.py
server/hookhelper.py
server/repository.py
server/securityhooks.py
server/session.py
--- a/__init__.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/__init__.py	Sun Aug 02 12:00:17 2009 +0200
@@ -100,13 +100,27 @@
                          [(etype,)])
         return self.decorate_rset(rset)
 
+    def empty_rset(self):
+        """return a result set for the given eid without doing actual query
+        (we have the eid, we can suppose it exists and user has access to the
+        entity)
+        """
+        from cubicweb.rset import ResultSet
+        return self.decorate_rset(ResultSet([], 'Any X WHERE X eid -1'))
+
     def entity_from_eid(self, eid, etype=None):
-        rset = self.eid_rset(eid, etype)
-        if rset:
-            return rset.get_entity(0, 0)
-        else:
-            return None
+        try:
+            return self.entity_cache(eid)
+        except KeyError:
+            rset = self.eid_rset(eid, etype)
+            entity = rset.get_entity(0, 0)
+            self.set_entity_cache(entity)
+            return entity
 
+    def entity_cache(self, eid):
+        raise KeyError
+    def set_entity_cache(self, entity):
+        pass
     # url generation methods ##################################################
 
     def build_url(self, *args, **kwargs):
--- a/entity.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/entity.py	Sun Aug 02 12:00:17 2009 +0200
@@ -647,7 +647,8 @@
             if not self.is_saved():
                 return None
             rql = "Any A WHERE X eid %%(x)s, X %s A" % name
-            # XXX should we really use unsafe_execute here??
+            # XXX should we really use unsafe_execute here? I think so (syt),
+            # see #344874
             execute = getattr(self.req, 'unsafe_execute', self.req.execute)
             try:
                 rset = execute(rql, {'x': self.eid}, 'x')
@@ -681,7 +682,10 @@
             pass
         assert self.has_eid()
         rql = self.related_rql(rtype, role)
-        rset = self.req.execute(rql, {'x': self.eid}, 'x')
+        # XXX should we really use unsafe_execute here? I think so (syt),
+        # see #344874
+        execute = getattr(self.req, 'unsafe_execute', self.req.execute)
+        rset = execute(rql, {'x': self.eid}, 'x')
         self.set_related_cache(rtype, role, rset)
         return self.related(rtype, role, limit, entities)
 
@@ -787,7 +791,7 @@
     def relation_cached(self, rtype, role):
         """return true if the given relation is already cached on the instance
         """
-        return '%s_%s' % (rtype, role) in self._related_cache
+        return self._related_cache.get('%s_%s' % (rtype, role))
 
     def related_cache(self, rtype, role, entities=True, limit=None):
         """return values for the given relation if it's cached on the instance,
--- a/rset.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/rset.py	Sun Aug 02 12:00:17 2009 +0200
@@ -347,7 +347,7 @@
             raise NotAnEntity(etype)
         return self._build_entity(row, col)
 
-    def _build_entity(self, row, col, _localcache=None):
+    def _build_entity(self, row, col):
         """internal method to get a single entity, returns a
         partially initialized Entity instance.
 
@@ -370,14 +370,7 @@
         # XXX should we consider updating a cached entity with possible
         #     new attributes found in this resultset ?
         try:
-            if hasattr(req, 'is_super_session'):
-                # this is a Session object which is not caching entities, so we
-                # have to use a local cache to avoid recursion pb
-                if _localcache is None:
-                    _localcache = {}
-                return _localcache[eid]
-            else:
-                return req.entity_cache(eid)
+            return req.entity_cache(eid)
         except KeyError:
             pass
         # build entity instance
@@ -385,8 +378,6 @@
         entity = self.vreg.etype_class(etype)(req, self, row, col)
         entity.set_eid(eid)
         # cache entity
-        if _localcache is not None:
-            _localcache[eid] = entity
         req.set_entity_cache(entity)
         eschema = entity.e_schema
         # try to complete the entity if there are some additional columns
@@ -420,7 +411,7 @@
                         rrset = ResultSet([], rql % (attr, entity.eid))
                         req.decorate_rset(rrset)
                     else:
-                        rrset = self._build_entity(row, i, _localcache).as_rset()
+                        rrset = self._build_entity(row, i).as_rset()
                     entity.set_related_cache(attr, x, rrset)
         return entity
 
--- a/server/hookhelper.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/server/hookhelper.py	Sun Aug 02 12:00:17 2009 +0200
@@ -17,9 +17,7 @@
 
 def entity_attr(session, eid, attr):
     """return an arbitrary attribute of the entity with the given eid"""
-    rset = session.execute('Any N WHERE X eid %%(x)s, X %s N' % attr,
-                           {'x': eid}, 'x')
-    return rset[0][0]
+    return getattr(session.entity_from_eid(eid), attr)
 
 def rproperty(session, rtype, eidfrom, eidto, rprop):
     rschema = session.repo.schema[rtype]
--- a/server/repository.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/server/repository.py	Sun Aug 02 12:00:17 2009 +0200
@@ -997,11 +997,10 @@
         for attr in entity.keys():
             rschema = eschema.subject_relation(attr)
             if not rschema.is_final(): # inlined relation
-                entity.set_related_cache(attr, 'subject',
-                                         entity.req.eid_rset(entity[attr]))
                 relations.append((attr, entity[attr]))
         if source.should_call_hooks:
             self.hm.call_hooks('before_add_entity', etype, session, entity)
+        entity.edited_attributes = entity.keys()
         entity.set_defaults()
         entity.check(creation=True)
         source.add_entity(session, entity)
@@ -1012,6 +1011,21 @@
             extid = None
         self.add_info(session, entity, source, extid, complete=False)
         entity._is_saved = True # entity has an eid and is saved
+        # prefill entity relation caches
+        session.set_entity_cache(entity)
+        for rschema in eschema.subject_relations():
+            rtype = str(rschema)
+            if rtype in VIRTUAL_RTYPES:
+                continue
+            if rschema.is_final():
+                entity.setdefault(rtype, None)
+            else:
+                entity.set_related_cache(rtype, 'subject', session.empty_rset())
+        for rschema in eschema.object_relations():
+            rtype = str(rschema)
+            if rtype in VIRTUAL_RTYPES:
+                continue
+            entity.set_related_cache(rtype, 'object', session.empty_rset())
         # trigger after_add_entity after after_add_relation
         if source.should_call_hooks:
             self.hm.call_hooks('after_add_entity', etype, session, entity)
@@ -1019,6 +1033,7 @@
             for attr, value in relations:
                 self.hm.call_hooks('before_add_relation', attr, session,
                                     entity.eid, attr, value)
+                session.update_rel_cache_add(entity.eid, attr, value)
                 self.hm.call_hooks('after_add_relation', attr, session,
                                     entity.eid, attr, value)
         return entity.eid
@@ -1032,6 +1047,7 @@
             print 'UPDATE entity', etype, entity.eid, dict(entity)
         entity.check()
         eschema = entity.e_schema
+        session.set_entity_cache(entity)
         only_inline_rels, need_fti_update = True, False
         relations = []
         for attr in entity.keys():
@@ -1047,10 +1063,12 @@
                 previous_value = entity.related(attr)
                 if previous_value:
                     previous_value = previous_value[0][0] # got a result set
-                    self.hm.call_hooks('before_delete_relation', attr, session,
-                                       entity.eid, attr, previous_value)
-                entity.set_related_cache(attr, 'subject',
-                                         entity.req.eid_rset(entity[attr]))
+                    if previous_value == entity[attr]:
+                        previous_value = None
+                    else:
+                        self.hm.call_hooks('before_delete_relation', attr,
+                                           session, entity.eid, attr,
+                                           previous_value)
                 relations.append((attr, entity[attr], previous_value))
         source = self.source_from_eid(entity.eid, session)
         if source.should_call_hooks:
@@ -1072,10 +1090,19 @@
                                     entity)
         if source.should_call_hooks:
             for attr, value, prevvalue in relations:
+                # if the relation is already cached, update existant cache
+                relcache = entity.relation_cached(attr, 'subject')
                 if prevvalue:
                     self.hm.call_hooks('after_delete_relation', attr, session,
                                        entity.eid, attr, prevvalue)
+                    if relcache is not None:
+                        session.update_rel_cache_del(entity.eid, attr, prevvalue)
                 del_existing_rel_if_needed(session, entity.eid, attr, value)
+                if relcache is not None:
+                    session.update_rel_cache_add(entity.eid, attr, value)
+                else:
+                    entity.set_related_cache(attr, 'subject',
+                                             session.eid_rset(value))
                 self.hm.call_hooks('after_add_relation', attr, session,
                                     entity.eid, attr, value)
 
@@ -1086,6 +1113,8 @@
         etype, uri, extid = self.type_and_source_from_eid(eid, session)
         if server.DEBUG & server.DBG_REPO:
             print 'DELETE entity', etype, eid
+            if eid == 937:
+                server.DEBUG |= (server.DBG_SQL | server.DBG_RQL | server.DBG_MORE)
         source = self.sources_by_uri[uri]
         if source.should_call_hooks:
             self.hm.call_hooks('before_delete_entity', etype, session, eid)
@@ -1105,6 +1134,8 @@
             self.hm.call_hooks('before_add_relation', rtype, session,
                                subject, rtype, object)
         source.add_relation(session, subject, rtype, object)
+        rschema = self.schema.rschema(rtype)
+        session.update_rel_cache_add(subject, rtype, object, rschema.symetric)
         if source.should_call_hooks:
             self.hm.call_hooks('after_add_relation', rtype, session,
                                subject, rtype, object)
@@ -1118,7 +1149,9 @@
             self.hm.call_hooks('before_delete_relation', rtype, session,
                                subject, rtype, object)
         source.delete_relation(session, subject, rtype, object)
-        if self.schema.rschema(rtype).symetric:
+        rschema = self.schema.rschema(rtype)
+        session.update_rel_cache_del(subject, rtype, object, rschema.symetric)
+        if rschema.symetric:
             # on symetric relation, we can't now in which sense it's
             # stored so try to delete both
             source.delete_relation(session, object, rtype, subject)
--- a/server/securityhooks.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/server/securityhooks.py	Sun Aug 02 12:00:17 2009 +0200
@@ -18,7 +18,11 @@
     # ._default_set is only there on entity creation to indicate unspecified
     # attributes which has been set to a default value defined in the schema
     defaults = getattr(entity, '_default_set', ())
-    for attr in entity.keys():
+    try:
+        editedattrs = entity.edited_attributes
+    except AttributeError:
+        editedattrs = entity.keys()
+    for attr in editedattrs:
         if attr in defaults:
             continue
         rschema = eschema.subject_relation(attr)
--- a/server/session.py	Sun Aug 02 11:58:55 2009 +0200
+++ b/server/session.py	Sun Aug 02 12:00:17 2009 +0200
@@ -88,6 +88,54 @@
         finally:
             self.is_super_session = False
 
+    def update_rel_cache_add(self, subject, rtype, object, symetric=False):
+        self._update_entity_rel_cache_add(subject, rtype, 'subject', object)
+        if symetric:
+            self._update_entity_rel_cache_add(object, rtype, 'subject', subject)
+        else:
+            self._update_entity_rel_cache_add(object, rtype, 'object', subject)
+
+    def update_rel_cache_del(self, subject, rtype, object, symetric=False):
+        self._update_entity_rel_cache_del(subject, rtype, 'subject', object)
+        if symetric:
+            self._update_entity_rel_cache_del(object, rtype, 'object', object)
+        else:
+            self._update_entity_rel_cache_del(object, rtype, 'object', subject)
+
+    def _rel_cache(self, eid, rtype, role):
+        try:
+            entity = self.entity_cache(eid)
+        except KeyError:
+            return
+        return entity.relation_cached(rtype, role)
+
+    def _update_entity_rel_cache_add(self, eid, rtype, role, targeteid):
+        rcache = self._rel_cache(eid, rtype, role)
+        if rcache is not None:
+            rset, entities = rcache
+            rset.rows.append([targeteid])
+            if isinstance(rset.description, list): # else description not set
+                rset.description.append([self.describe(targeteid)[0]])
+            rset.rowcount += 1
+            targetentity = self.entity_from_eid(targeteid)
+            entities.append(targetentity)
+
+    def _update_entity_rel_cache_del(self, eid, rtype, role, targeteid):
+        rcache = self._rel_cache(eid, rtype, role)
+        if rcache is not None:
+            rset, entities = rcache
+            for idx, row in enumerate(rset.rows):
+                if row[0] == targeteid:
+                    break
+            else:
+                raise Exception('cache inconsistency for %s %s %s %s' %
+                                (eid, rtype, role, targeteid))
+            del rset.rows[idx]
+            if isinstance(rset.description, list): # else description not set
+                del rset.description[idx]
+            del entities[idx]
+            rset.rowcount -= 1
+
     # resource accessors ######################################################
 
     def actual_session(self):
@@ -221,12 +269,30 @@
     # request interface #######################################################
 
     def set_entity_cache(self, entity):
-        # no entity cache in the server, too high risk of inconsistency
-        # between pre/post hooks
-        pass
+        # XXX session level caching may be a pb with multiple repository
+        #     instances, but 1. this is probably not the only one :$ and 2. it
+        #     may be an acceptable risk. Anyway we could activate it or not
+        #     according to a configuration option
+        try:
+            self.transaction_data['ecache'].setdefault(entity.eid, entity)
+        except KeyError:
+            self.transaction_data['ecache'] = ecache = {}
+            ecache[entity.eid] = entity
 
     def entity_cache(self, eid):
-        raise KeyError(eid)
+        try:
+            return self.transaction_data['ecache'][eid]
+        except:
+            raise
+
+    def cached_entities(self):
+        return self.transaction_data.get('ecache', {}).values()
+
+    def drop_entity_cache(self, eid=None):
+        if eid is None:
+            self.transaction_data.pop('ecache', None)
+        else:
+            del self.transaction_data['ecache'][eid]
 
     def base_url(self):
         url = self.repo.config['base-url']
@@ -276,7 +342,7 @@
         self.set_pool()
         return csession
 
-    def unsafe_execute(self, rql, kwargs=None, eid_key=None, build_descr=False,
+    def unsafe_execute(self, rql, kwargs=None, eid_key=None, build_descr=True,
                        propagate=False):
         """like .execute but with security checking disabled (this method is
         internal to the server, it's not part of the db-api)