# HG changeset patch # User Sylvain Thénault # Date 1249207217 -7200 # Node ID b0a2e779845c275c919ac2968c9caab3c89832b7 # Parent d2874ddd4347592f71988255d17aea1873c3a110 enable server side entity caching, 25% speedup on codenaf insertion. ALL CW TESTS OK diff -r d2874ddd4347 -r b0a2e779845c __init__.py --- a/__init__.py Sun Aug 02 11:58:55 2009 +0200 +++ b/__init__.py Sun Aug 02 12:00:17 2009 +0200 @@ -100,13 +100,27 @@ [(etype,)]) return self.decorate_rset(rset) + def empty_rset(self): + """return a result set for the given eid without doing actual query + (we have the eid, we can suppose it exists and user has access to the + entity) + """ + from cubicweb.rset import ResultSet + return self.decorate_rset(ResultSet([], 'Any X WHERE X eid -1')) + def entity_from_eid(self, eid, etype=None): - rset = self.eid_rset(eid, etype) - if rset: - return rset.get_entity(0, 0) - else: - return None + try: + return self.entity_cache(eid) + except KeyError: + rset = self.eid_rset(eid, etype) + entity = rset.get_entity(0, 0) + self.set_entity_cache(entity) + return entity + def entity_cache(self, eid): + raise KeyError + def set_entity_cache(self, entity): + pass # url generation methods ################################################## def build_url(self, *args, **kwargs): diff -r d2874ddd4347 -r b0a2e779845c entity.py --- a/entity.py Sun Aug 02 11:58:55 2009 +0200 +++ b/entity.py Sun Aug 02 12:00:17 2009 +0200 @@ -647,7 +647,8 @@ if not self.is_saved(): return None rql = "Any A WHERE X eid %%(x)s, X %s A" % name - # XXX should we really use unsafe_execute here?? + # XXX should we really use unsafe_execute here? I think so (syt), + # see #344874 execute = getattr(self.req, 'unsafe_execute', self.req.execute) try: rset = execute(rql, {'x': self.eid}, 'x') @@ -681,7 +682,10 @@ pass assert self.has_eid() rql = self.related_rql(rtype, role) - rset = self.req.execute(rql, {'x': self.eid}, 'x') + # XXX should we really use unsafe_execute here? I think so (syt), + # see #344874 + execute = getattr(self.req, 'unsafe_execute', self.req.execute) + rset = execute(rql, {'x': self.eid}, 'x') self.set_related_cache(rtype, role, rset) return self.related(rtype, role, limit, entities) @@ -787,7 +791,7 @@ def relation_cached(self, rtype, role): """return true if the given relation is already cached on the instance """ - return '%s_%s' % (rtype, role) in self._related_cache + return self._related_cache.get('%s_%s' % (rtype, role)) def related_cache(self, rtype, role, entities=True, limit=None): """return values for the given relation if it's cached on the instance, diff -r d2874ddd4347 -r b0a2e779845c rset.py --- a/rset.py Sun Aug 02 11:58:55 2009 +0200 +++ b/rset.py Sun Aug 02 12:00:17 2009 +0200 @@ -347,7 +347,7 @@ raise NotAnEntity(etype) return self._build_entity(row, col) - def _build_entity(self, row, col, _localcache=None): + def _build_entity(self, row, col): """internal method to get a single entity, returns a partially initialized Entity instance. @@ -370,14 +370,7 @@ # XXX should we consider updating a cached entity with possible # new attributes found in this resultset ? try: - if hasattr(req, 'is_super_session'): - # this is a Session object which is not caching entities, so we - # have to use a local cache to avoid recursion pb - if _localcache is None: - _localcache = {} - return _localcache[eid] - else: - return req.entity_cache(eid) + return req.entity_cache(eid) except KeyError: pass # build entity instance @@ -385,8 +378,6 @@ entity = self.vreg.etype_class(etype)(req, self, row, col) entity.set_eid(eid) # cache entity - if _localcache is not None: - _localcache[eid] = entity req.set_entity_cache(entity) eschema = entity.e_schema # try to complete the entity if there are some additional columns @@ -420,7 +411,7 @@ rrset = ResultSet([], rql % (attr, entity.eid)) req.decorate_rset(rrset) else: - rrset = self._build_entity(row, i, _localcache).as_rset() + rrset = self._build_entity(row, i).as_rset() entity.set_related_cache(attr, x, rrset) return entity diff -r d2874ddd4347 -r b0a2e779845c server/hookhelper.py --- a/server/hookhelper.py Sun Aug 02 11:58:55 2009 +0200 +++ b/server/hookhelper.py Sun Aug 02 12:00:17 2009 +0200 @@ -17,9 +17,7 @@ def entity_attr(session, eid, attr): """return an arbitrary attribute of the entity with the given eid""" - rset = session.execute('Any N WHERE X eid %%(x)s, X %s N' % attr, - {'x': eid}, 'x') - return rset[0][0] + return getattr(session.entity_from_eid(eid), attr) def rproperty(session, rtype, eidfrom, eidto, rprop): rschema = session.repo.schema[rtype] diff -r d2874ddd4347 -r b0a2e779845c server/repository.py --- a/server/repository.py Sun Aug 02 11:58:55 2009 +0200 +++ b/server/repository.py Sun Aug 02 12:00:17 2009 +0200 @@ -997,11 +997,10 @@ for attr in entity.keys(): rschema = eschema.subject_relation(attr) if not rschema.is_final(): # inlined relation - entity.set_related_cache(attr, 'subject', - entity.req.eid_rset(entity[attr])) relations.append((attr, entity[attr])) if source.should_call_hooks: self.hm.call_hooks('before_add_entity', etype, session, entity) + entity.edited_attributes = entity.keys() entity.set_defaults() entity.check(creation=True) source.add_entity(session, entity) @@ -1012,6 +1011,21 @@ extid = None self.add_info(session, entity, source, extid, complete=False) entity._is_saved = True # entity has an eid and is saved + # prefill entity relation caches + session.set_entity_cache(entity) + for rschema in eschema.subject_relations(): + rtype = str(rschema) + if rtype in VIRTUAL_RTYPES: + continue + if rschema.is_final(): + entity.setdefault(rtype, None) + else: + entity.set_related_cache(rtype, 'subject', session.empty_rset()) + for rschema in eschema.object_relations(): + rtype = str(rschema) + if rtype in VIRTUAL_RTYPES: + continue + entity.set_related_cache(rtype, 'object', session.empty_rset()) # trigger after_add_entity after after_add_relation if source.should_call_hooks: self.hm.call_hooks('after_add_entity', etype, session, entity) @@ -1019,6 +1033,7 @@ for attr, value in relations: self.hm.call_hooks('before_add_relation', attr, session, entity.eid, attr, value) + session.update_rel_cache_add(entity.eid, attr, value) self.hm.call_hooks('after_add_relation', attr, session, entity.eid, attr, value) return entity.eid @@ -1032,6 +1047,7 @@ print 'UPDATE entity', etype, entity.eid, dict(entity) entity.check() eschema = entity.e_schema + session.set_entity_cache(entity) only_inline_rels, need_fti_update = True, False relations = [] for attr in entity.keys(): @@ -1047,10 +1063,12 @@ previous_value = entity.related(attr) if previous_value: previous_value = previous_value[0][0] # got a result set - self.hm.call_hooks('before_delete_relation', attr, session, - entity.eid, attr, previous_value) - entity.set_related_cache(attr, 'subject', - entity.req.eid_rset(entity[attr])) + if previous_value == entity[attr]: + previous_value = None + else: + self.hm.call_hooks('before_delete_relation', attr, + session, entity.eid, attr, + previous_value) relations.append((attr, entity[attr], previous_value)) source = self.source_from_eid(entity.eid, session) if source.should_call_hooks: @@ -1072,10 +1090,19 @@ entity) if source.should_call_hooks: for attr, value, prevvalue in relations: + # if the relation is already cached, update existant cache + relcache = entity.relation_cached(attr, 'subject') if prevvalue: self.hm.call_hooks('after_delete_relation', attr, session, entity.eid, attr, prevvalue) + if relcache is not None: + session.update_rel_cache_del(entity.eid, attr, prevvalue) del_existing_rel_if_needed(session, entity.eid, attr, value) + if relcache is not None: + session.update_rel_cache_add(entity.eid, attr, value) + else: + entity.set_related_cache(attr, 'subject', + session.eid_rset(value)) self.hm.call_hooks('after_add_relation', attr, session, entity.eid, attr, value) @@ -1086,6 +1113,8 @@ etype, uri, extid = self.type_and_source_from_eid(eid, session) if server.DEBUG & server.DBG_REPO: print 'DELETE entity', etype, eid + if eid == 937: + server.DEBUG |= (server.DBG_SQL | server.DBG_RQL | server.DBG_MORE) source = self.sources_by_uri[uri] if source.should_call_hooks: self.hm.call_hooks('before_delete_entity', etype, session, eid) @@ -1105,6 +1134,8 @@ self.hm.call_hooks('before_add_relation', rtype, session, subject, rtype, object) source.add_relation(session, subject, rtype, object) + rschema = self.schema.rschema(rtype) + session.update_rel_cache_add(subject, rtype, object, rschema.symetric) if source.should_call_hooks: self.hm.call_hooks('after_add_relation', rtype, session, subject, rtype, object) @@ -1118,7 +1149,9 @@ self.hm.call_hooks('before_delete_relation', rtype, session, subject, rtype, object) source.delete_relation(session, subject, rtype, object) - if self.schema.rschema(rtype).symetric: + rschema = self.schema.rschema(rtype) + session.update_rel_cache_del(subject, rtype, object, rschema.symetric) + if rschema.symetric: # on symetric relation, we can't now in which sense it's # stored so try to delete both source.delete_relation(session, object, rtype, subject) diff -r d2874ddd4347 -r b0a2e779845c server/securityhooks.py --- a/server/securityhooks.py Sun Aug 02 11:58:55 2009 +0200 +++ b/server/securityhooks.py Sun Aug 02 12:00:17 2009 +0200 @@ -18,7 +18,11 @@ # ._default_set is only there on entity creation to indicate unspecified # attributes which has been set to a default value defined in the schema defaults = getattr(entity, '_default_set', ()) - for attr in entity.keys(): + try: + editedattrs = entity.edited_attributes + except AttributeError: + editedattrs = entity.keys() + for attr in editedattrs: if attr in defaults: continue rschema = eschema.subject_relation(attr) diff -r d2874ddd4347 -r b0a2e779845c server/session.py --- a/server/session.py Sun Aug 02 11:58:55 2009 +0200 +++ b/server/session.py Sun Aug 02 12:00:17 2009 +0200 @@ -88,6 +88,54 @@ finally: self.is_super_session = False + def update_rel_cache_add(self, subject, rtype, object, symetric=False): + self._update_entity_rel_cache_add(subject, rtype, 'subject', object) + if symetric: + self._update_entity_rel_cache_add(object, rtype, 'subject', subject) + else: + self._update_entity_rel_cache_add(object, rtype, 'object', subject) + + def update_rel_cache_del(self, subject, rtype, object, symetric=False): + self._update_entity_rel_cache_del(subject, rtype, 'subject', object) + if symetric: + self._update_entity_rel_cache_del(object, rtype, 'object', object) + else: + self._update_entity_rel_cache_del(object, rtype, 'object', subject) + + def _rel_cache(self, eid, rtype, role): + try: + entity = self.entity_cache(eid) + except KeyError: + return + return entity.relation_cached(rtype, role) + + def _update_entity_rel_cache_add(self, eid, rtype, role, targeteid): + rcache = self._rel_cache(eid, rtype, role) + if rcache is not None: + rset, entities = rcache + rset.rows.append([targeteid]) + if isinstance(rset.description, list): # else description not set + rset.description.append([self.describe(targeteid)[0]]) + rset.rowcount += 1 + targetentity = self.entity_from_eid(targeteid) + entities.append(targetentity) + + def _update_entity_rel_cache_del(self, eid, rtype, role, targeteid): + rcache = self._rel_cache(eid, rtype, role) + if rcache is not None: + rset, entities = rcache + for idx, row in enumerate(rset.rows): + if row[0] == targeteid: + break + else: + raise Exception('cache inconsistency for %s %s %s %s' % + (eid, rtype, role, targeteid)) + del rset.rows[idx] + if isinstance(rset.description, list): # else description not set + del rset.description[idx] + del entities[idx] + rset.rowcount -= 1 + # resource accessors ###################################################### def actual_session(self): @@ -221,12 +269,30 @@ # request interface ####################################################### def set_entity_cache(self, entity): - # no entity cache in the server, too high risk of inconsistency - # between pre/post hooks - pass + # XXX session level caching may be a pb with multiple repository + # instances, but 1. this is probably not the only one :$ and 2. it + # may be an acceptable risk. Anyway we could activate it or not + # according to a configuration option + try: + self.transaction_data['ecache'].setdefault(entity.eid, entity) + except KeyError: + self.transaction_data['ecache'] = ecache = {} + ecache[entity.eid] = entity def entity_cache(self, eid): - raise KeyError(eid) + try: + return self.transaction_data['ecache'][eid] + except: + raise + + def cached_entities(self): + return self.transaction_data.get('ecache', {}).values() + + def drop_entity_cache(self, eid=None): + if eid is None: + self.transaction_data.pop('ecache', None) + else: + del self.transaction_data['ecache'][eid] def base_url(self): url = self.repo.config['base-url'] @@ -276,7 +342,7 @@ self.set_pool() return csession - def unsafe_execute(self, rql, kwargs=None, eid_key=None, build_descr=False, + def unsafe_execute(self, rql, kwargs=None, eid_key=None, build_descr=True, propagate=False): """like .execute but with security checking disabled (this method is internal to the server, it's not part of the db-api)