enable server side entity caching, 25% speedup on codenaf insertion. ALL CW TESTS OK
--- a/__init__.py Sun Aug 02 11:58:55 2009 +0200
+++ b/__init__.py Sun Aug 02 12:00:17 2009 +0200
@@ -100,13 +100,27 @@
[(etype,)])
return self.decorate_rset(rset)
+ def empty_rset(self):
+ """return a result set for the given eid without doing actual query
+ (we have the eid, we can suppose it exists and user has access to the
+ entity)
+ """
+ from cubicweb.rset import ResultSet
+ return self.decorate_rset(ResultSet([], 'Any X WHERE X eid -1'))
+
def entity_from_eid(self, eid, etype=None):
- rset = self.eid_rset(eid, etype)
- if rset:
- return rset.get_entity(0, 0)
- else:
- return None
+ try:
+ return self.entity_cache(eid)
+ except KeyError:
+ rset = self.eid_rset(eid, etype)
+ entity = rset.get_entity(0, 0)
+ self.set_entity_cache(entity)
+ return entity
+ def entity_cache(self, eid):
+ raise KeyError
+ def set_entity_cache(self, entity):
+ pass
# url generation methods ##################################################
def build_url(self, *args, **kwargs):
--- a/entity.py Sun Aug 02 11:58:55 2009 +0200
+++ b/entity.py Sun Aug 02 12:00:17 2009 +0200
@@ -647,7 +647,8 @@
if not self.is_saved():
return None
rql = "Any A WHERE X eid %%(x)s, X %s A" % name
- # XXX should we really use unsafe_execute here??
+ # XXX should we really use unsafe_execute here? I think so (syt),
+ # see #344874
execute = getattr(self.req, 'unsafe_execute', self.req.execute)
try:
rset = execute(rql, {'x': self.eid}, 'x')
@@ -681,7 +682,10 @@
pass
assert self.has_eid()
rql = self.related_rql(rtype, role)
- rset = self.req.execute(rql, {'x': self.eid}, 'x')
+ # XXX should we really use unsafe_execute here? I think so (syt),
+ # see #344874
+ execute = getattr(self.req, 'unsafe_execute', self.req.execute)
+ rset = execute(rql, {'x': self.eid}, 'x')
self.set_related_cache(rtype, role, rset)
return self.related(rtype, role, limit, entities)
@@ -787,7 +791,7 @@
def relation_cached(self, rtype, role):
"""return true if the given relation is already cached on the instance
"""
- return '%s_%s' % (rtype, role) in self._related_cache
+ return self._related_cache.get('%s_%s' % (rtype, role))
def related_cache(self, rtype, role, entities=True, limit=None):
"""return values for the given relation if it's cached on the instance,
--- a/rset.py Sun Aug 02 11:58:55 2009 +0200
+++ b/rset.py Sun Aug 02 12:00:17 2009 +0200
@@ -347,7 +347,7 @@
raise NotAnEntity(etype)
return self._build_entity(row, col)
- def _build_entity(self, row, col, _localcache=None):
+ def _build_entity(self, row, col):
"""internal method to get a single entity, returns a
partially initialized Entity instance.
@@ -370,14 +370,7 @@
# XXX should we consider updating a cached entity with possible
# new attributes found in this resultset ?
try:
- if hasattr(req, 'is_super_session'):
- # this is a Session object which is not caching entities, so we
- # have to use a local cache to avoid recursion pb
- if _localcache is None:
- _localcache = {}
- return _localcache[eid]
- else:
- return req.entity_cache(eid)
+ return req.entity_cache(eid)
except KeyError:
pass
# build entity instance
@@ -385,8 +378,6 @@
entity = self.vreg.etype_class(etype)(req, self, row, col)
entity.set_eid(eid)
# cache entity
- if _localcache is not None:
- _localcache[eid] = entity
req.set_entity_cache(entity)
eschema = entity.e_schema
# try to complete the entity if there are some additional columns
@@ -420,7 +411,7 @@
rrset = ResultSet([], rql % (attr, entity.eid))
req.decorate_rset(rrset)
else:
- rrset = self._build_entity(row, i, _localcache).as_rset()
+ rrset = self._build_entity(row, i).as_rset()
entity.set_related_cache(attr, x, rrset)
return entity
--- a/server/hookhelper.py Sun Aug 02 11:58:55 2009 +0200
+++ b/server/hookhelper.py Sun Aug 02 12:00:17 2009 +0200
@@ -17,9 +17,7 @@
def entity_attr(session, eid, attr):
"""return an arbitrary attribute of the entity with the given eid"""
- rset = session.execute('Any N WHERE X eid %%(x)s, X %s N' % attr,
- {'x': eid}, 'x')
- return rset[0][0]
+ return getattr(session.entity_from_eid(eid), attr)
def rproperty(session, rtype, eidfrom, eidto, rprop):
rschema = session.repo.schema[rtype]
--- a/server/repository.py Sun Aug 02 11:58:55 2009 +0200
+++ b/server/repository.py Sun Aug 02 12:00:17 2009 +0200
@@ -997,11 +997,10 @@
for attr in entity.keys():
rschema = eschema.subject_relation(attr)
if not rschema.is_final(): # inlined relation
- entity.set_related_cache(attr, 'subject',
- entity.req.eid_rset(entity[attr]))
relations.append((attr, entity[attr]))
if source.should_call_hooks:
self.hm.call_hooks('before_add_entity', etype, session, entity)
+ entity.edited_attributes = entity.keys()
entity.set_defaults()
entity.check(creation=True)
source.add_entity(session, entity)
@@ -1012,6 +1011,21 @@
extid = None
self.add_info(session, entity, source, extid, complete=False)
entity._is_saved = True # entity has an eid and is saved
+ # prefill entity relation caches
+ session.set_entity_cache(entity)
+ for rschema in eschema.subject_relations():
+ rtype = str(rschema)
+ if rtype in VIRTUAL_RTYPES:
+ continue
+ if rschema.is_final():
+ entity.setdefault(rtype, None)
+ else:
+ entity.set_related_cache(rtype, 'subject', session.empty_rset())
+ for rschema in eschema.object_relations():
+ rtype = str(rschema)
+ if rtype in VIRTUAL_RTYPES:
+ continue
+ entity.set_related_cache(rtype, 'object', session.empty_rset())
# trigger after_add_entity after after_add_relation
if source.should_call_hooks:
self.hm.call_hooks('after_add_entity', etype, session, entity)
@@ -1019,6 +1033,7 @@
for attr, value in relations:
self.hm.call_hooks('before_add_relation', attr, session,
entity.eid, attr, value)
+ session.update_rel_cache_add(entity.eid, attr, value)
self.hm.call_hooks('after_add_relation', attr, session,
entity.eid, attr, value)
return entity.eid
@@ -1032,6 +1047,7 @@
print 'UPDATE entity', etype, entity.eid, dict(entity)
entity.check()
eschema = entity.e_schema
+ session.set_entity_cache(entity)
only_inline_rels, need_fti_update = True, False
relations = []
for attr in entity.keys():
@@ -1047,10 +1063,12 @@
previous_value = entity.related(attr)
if previous_value:
previous_value = previous_value[0][0] # got a result set
- self.hm.call_hooks('before_delete_relation', attr, session,
- entity.eid, attr, previous_value)
- entity.set_related_cache(attr, 'subject',
- entity.req.eid_rset(entity[attr]))
+ if previous_value == entity[attr]:
+ previous_value = None
+ else:
+ self.hm.call_hooks('before_delete_relation', attr,
+ session, entity.eid, attr,
+ previous_value)
relations.append((attr, entity[attr], previous_value))
source = self.source_from_eid(entity.eid, session)
if source.should_call_hooks:
@@ -1072,10 +1090,19 @@
entity)
if source.should_call_hooks:
for attr, value, prevvalue in relations:
+ # if the relation is already cached, update existant cache
+ relcache = entity.relation_cached(attr, 'subject')
if prevvalue:
self.hm.call_hooks('after_delete_relation', attr, session,
entity.eid, attr, prevvalue)
+ if relcache is not None:
+ session.update_rel_cache_del(entity.eid, attr, prevvalue)
del_existing_rel_if_needed(session, entity.eid, attr, value)
+ if relcache is not None:
+ session.update_rel_cache_add(entity.eid, attr, value)
+ else:
+ entity.set_related_cache(attr, 'subject',
+ session.eid_rset(value))
self.hm.call_hooks('after_add_relation', attr, session,
entity.eid, attr, value)
@@ -1086,6 +1113,8 @@
etype, uri, extid = self.type_and_source_from_eid(eid, session)
if server.DEBUG & server.DBG_REPO:
print 'DELETE entity', etype, eid
+ if eid == 937:
+ server.DEBUG |= (server.DBG_SQL | server.DBG_RQL | server.DBG_MORE)
source = self.sources_by_uri[uri]
if source.should_call_hooks:
self.hm.call_hooks('before_delete_entity', etype, session, eid)
@@ -1105,6 +1134,8 @@
self.hm.call_hooks('before_add_relation', rtype, session,
subject, rtype, object)
source.add_relation(session, subject, rtype, object)
+ rschema = self.schema.rschema(rtype)
+ session.update_rel_cache_add(subject, rtype, object, rschema.symetric)
if source.should_call_hooks:
self.hm.call_hooks('after_add_relation', rtype, session,
subject, rtype, object)
@@ -1118,7 +1149,9 @@
self.hm.call_hooks('before_delete_relation', rtype, session,
subject, rtype, object)
source.delete_relation(session, subject, rtype, object)
- if self.schema.rschema(rtype).symetric:
+ rschema = self.schema.rschema(rtype)
+ session.update_rel_cache_del(subject, rtype, object, rschema.symetric)
+ if rschema.symetric:
# on symetric relation, we can't now in which sense it's
# stored so try to delete both
source.delete_relation(session, object, rtype, subject)
--- a/server/securityhooks.py Sun Aug 02 11:58:55 2009 +0200
+++ b/server/securityhooks.py Sun Aug 02 12:00:17 2009 +0200
@@ -18,7 +18,11 @@
# ._default_set is only there on entity creation to indicate unspecified
# attributes which has been set to a default value defined in the schema
defaults = getattr(entity, '_default_set', ())
- for attr in entity.keys():
+ try:
+ editedattrs = entity.edited_attributes
+ except AttributeError:
+ editedattrs = entity.keys()
+ for attr in editedattrs:
if attr in defaults:
continue
rschema = eschema.subject_relation(attr)
--- a/server/session.py Sun Aug 02 11:58:55 2009 +0200
+++ b/server/session.py Sun Aug 02 12:00:17 2009 +0200
@@ -88,6 +88,54 @@
finally:
self.is_super_session = False
+ def update_rel_cache_add(self, subject, rtype, object, symetric=False):
+ self._update_entity_rel_cache_add(subject, rtype, 'subject', object)
+ if symetric:
+ self._update_entity_rel_cache_add(object, rtype, 'subject', subject)
+ else:
+ self._update_entity_rel_cache_add(object, rtype, 'object', subject)
+
+ def update_rel_cache_del(self, subject, rtype, object, symetric=False):
+ self._update_entity_rel_cache_del(subject, rtype, 'subject', object)
+ if symetric:
+ self._update_entity_rel_cache_del(object, rtype, 'object', object)
+ else:
+ self._update_entity_rel_cache_del(object, rtype, 'object', subject)
+
+ def _rel_cache(self, eid, rtype, role):
+ try:
+ entity = self.entity_cache(eid)
+ except KeyError:
+ return
+ return entity.relation_cached(rtype, role)
+
+ def _update_entity_rel_cache_add(self, eid, rtype, role, targeteid):
+ rcache = self._rel_cache(eid, rtype, role)
+ if rcache is not None:
+ rset, entities = rcache
+ rset.rows.append([targeteid])
+ if isinstance(rset.description, list): # else description not set
+ rset.description.append([self.describe(targeteid)[0]])
+ rset.rowcount += 1
+ targetentity = self.entity_from_eid(targeteid)
+ entities.append(targetentity)
+
+ def _update_entity_rel_cache_del(self, eid, rtype, role, targeteid):
+ rcache = self._rel_cache(eid, rtype, role)
+ if rcache is not None:
+ rset, entities = rcache
+ for idx, row in enumerate(rset.rows):
+ if row[0] == targeteid:
+ break
+ else:
+ raise Exception('cache inconsistency for %s %s %s %s' %
+ (eid, rtype, role, targeteid))
+ del rset.rows[idx]
+ if isinstance(rset.description, list): # else description not set
+ del rset.description[idx]
+ del entities[idx]
+ rset.rowcount -= 1
+
# resource accessors ######################################################
def actual_session(self):
@@ -221,12 +269,30 @@
# request interface #######################################################
def set_entity_cache(self, entity):
- # no entity cache in the server, too high risk of inconsistency
- # between pre/post hooks
- pass
+ # XXX session level caching may be a pb with multiple repository
+ # instances, but 1. this is probably not the only one :$ and 2. it
+ # may be an acceptable risk. Anyway we could activate it or not
+ # according to a configuration option
+ try:
+ self.transaction_data['ecache'].setdefault(entity.eid, entity)
+ except KeyError:
+ self.transaction_data['ecache'] = ecache = {}
+ ecache[entity.eid] = entity
def entity_cache(self, eid):
- raise KeyError(eid)
+ try:
+ return self.transaction_data['ecache'][eid]
+ except:
+ raise
+
+ def cached_entities(self):
+ return self.transaction_data.get('ecache', {}).values()
+
+ def drop_entity_cache(self, eid=None):
+ if eid is None:
+ self.transaction_data.pop('ecache', None)
+ else:
+ del self.transaction_data['ecache'][eid]
def base_url(self):
url = self.repo.config['base-url']
@@ -276,7 +342,7 @@
self.set_pool()
return csession
- def unsafe_execute(self, rql, kwargs=None, eid_key=None, build_descr=False,
+ def unsafe_execute(self, rql, kwargs=None, eid_key=None, build_descr=True,
propagate=False):
"""like .execute but with security checking disabled (this method is
internal to the server, it's not part of the db-api)