# HG changeset patch # User Aurelien Campeas # Date 1318581211 -7200 # Node ID a3d3220669d6ffc1f3b5e0d5241ac79ba482dd53 # Parent a37531c8a4a6d4b990b9220d5d0db46c6485ca92 [cache] replace lgc.cache with something more appropriate (closes #1921713) diff -r a37531c8a4a6 -r a3d3220669d6 misc/migration/3.14.0_Any.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/misc/migration/3.14.0_Any.py Fri Oct 14 10:33:31 2011 +0200 @@ -0,0 +1,2 @@ +config['rql-cache-size'] = config['rql-cache-size'] * 10 + diff -r a37531c8a4a6 -r a3d3220669d6 server/querier.py --- a/server/querier.py Fri Oct 14 09:21:45 2011 +0200 +++ b/server/querier.py Fri Oct 14 10:33:31 2011 +0200 @@ -25,7 +25,6 @@ from itertools import repeat -from logilab.common.cache import Cache from logilab.common.compat import any from rql import RQLSyntaxError from rql.stmts import Union, Select @@ -36,6 +35,7 @@ from cubicweb import server, typed_eid from cubicweb.rset import ResultSet +from cubicweb.utils import QueryCache from cubicweb.server.utils import cleanup_solutions from cubicweb.server.rqlannotation import SQLGenAnnotator, set_qdata from cubicweb.server.ssplanner import READ_ONLY_RTYPES, add_types_restriction @@ -599,7 +599,7 @@ self.schema = schema repo = self._repo # rql st and solution cache. - self._rql_cache = Cache(repo.config['rql-cache-size']) + self._rql_cache = QueryCache(repo.config['rql-cache-size']) # rql cache key cache. Don't bother using a Cache instance: we should # have a limited number of queries in there, since there are no entries # in this cache for user queries (which have no args) diff -r a37531c8a4a6 -r a3d3220669d6 server/sources/native.py --- a/server/sources/native.py Fri Oct 14 09:21:45 2011 +0200 +++ b/server/sources/native.py Fri Oct 14 10:33:31 2011 +0200 @@ -46,7 +46,6 @@ import sys from logilab.common.compat import any -from logilab.common.cache import Cache from logilab.common.decorators import cached, clear_cache from logilab.common.configuration import Method from logilab.common.shellutils import getlogin @@ -58,6 +57,7 @@ from cubicweb import (UnknownEid, AuthenticationError, ValidationError, Binary, UniqueTogetherError) from cubicweb import transaction as tx, server, neg_role +from cubicweb.utils import QueryCache from cubicweb.schema import VIRTUAL_RTYPES from cubicweb.cwconfig import CubicWebNoAppConfiguration from cubicweb.server import hook @@ -295,7 +295,7 @@ # full text index helper self.do_fti = not repo.config['delay-full-text-indexation'] # sql queries cache - self._cache = Cache(repo.config['rql-cache-size']) + self._cache = QueryCache(repo.config['rql-cache-size']) self._temp_table_data = {} # we need a lock to protect eid attribution function (XXX, really? # explain) @@ -343,7 +343,7 @@ def reset_caches(self): """method called during test to reset potential source caches""" - self._cache = Cache(self.repo.config['rql-cache-size']) + self._cache = QueryCache(self.repo.config['rql-cache-size']) def clear_eid_cache(self, eid, etype): """clear potential caches for the given eid""" @@ -463,7 +463,7 @@ def set_schema(self, schema): """set the instance'schema""" - self._cache = Cache(self.repo.config['rql-cache-size']) + self._cache = QueryCache(self.repo.config['rql-cache-size']) self.cache_hit, self.cache_miss, self.no_cache = 0, 0, 0 self.schema = schema try: diff -r a37531c8a4a6 -r a3d3220669d6 test/unittest_utils.py --- a/test/unittest_utils.py Fri Oct 14 09:21:45 2011 +0200 +++ b/test/unittest_utils.py Fri Oct 14 10:33:31 2011 +0200 @@ -26,7 +26,7 @@ from cubicweb.devtools.testlib import CubicWebTC from cubicweb.utils import (make_uid, UStringIO, SizeConstrainedList, - RepeatList, HTMLHead) + RepeatList, HTMLHead, QueryCache) from cubicweb.entity import Entity try: @@ -50,6 +50,55 @@ 'some numeric character, got %s' % uid) d.add(uid) +class TestQueryCache(TestCase): + def test_querycache(self): + c = QueryCache(ceiling=20) + # write only + for x in xrange(10): + c[x] = x + self.assertEqual(c._usage_report(), + {'transientcount': 0, + 'itemcount': 10, + 'permanentcount': 0}) + c = QueryCache(ceiling=10) + # we should also get a warning + for x in xrange(20): + c[x] = x + self.assertEqual(c._usage_report(), + {'transientcount': 0, + 'itemcount': 10, + 'permanentcount': 0}) + # write + reads + c = QueryCache(ceiling=20) + for n in xrange(4): + for x in xrange(10): + c[x] = x + c[x] + self.assertEqual(c._usage_report(), + {'transientcount': 10, + 'itemcount': 10, + 'permanentcount': 0}) + c = QueryCache(ceiling=20) + for n in xrange(17): + for x in xrange(10): + c[x] = x + c[x] + self.assertEqual(c._usage_report(), + {'transientcount': 0, + 'itemcount': 10, + 'permanentcount': 10}) + c = QueryCache(ceiling=20) + for n in xrange(17): + for x in xrange(10): + c[x] = x + if n % 2: + c[x] + if x % 2: + c[x] + self.assertEqual(c._usage_report(), + {'transientcount': 5, + 'itemcount': 10, + 'permanentcount': 5}) class UStringIOTC(TestCase): def test_boolean_value(self): diff -r a37531c8a4a6 -r a3d3220669d6 utils.py --- a/utils.py Fri Oct 14 09:21:45 2011 +0200 +++ b/utils.py Fri Oct 14 10:33:31 2011 +0200 @@ -16,18 +16,21 @@ # You should have received a copy of the GNU Lesser General Public License along # with CubicWeb. If not, see . """Some utilities for CubicWeb server/clients.""" - +from __future__ import division __docformat__ = "restructuredtext en" -import os import sys import decimal import datetime import random +from operator import itemgetter from inspect import getargspec from itertools import repeat from uuid import uuid4 from warnings import warn +from threading import Lock + +from logging import getLogger from logilab.mtconverter import xml_escape from logilab.common.deprecation import deprecated @@ -551,3 +554,125 @@ 'strptime'): msg = '[3.6] %s has been moved to logilab.common.date' % funcname _THIS_MOD_NS[funcname] = deprecated(msg)(getattr(date, funcname)) + + +logger = getLogger('cubicweb.utils') + +class QueryCache(object): + """ a minimalist dict-like object to be used by the querier + and native source (replaces lgc.cache for this very usage) + + To be efficient it must be properly used. The usage patterns are + quite specific to its current clients. + + The ceiling value should be sufficiently high, else it will be + ruthlessly inefficient (there will be warnings when this happens). + A good (high enough) value can only be set on a per-application + value. A default, reasonnably high value is provided but tuning + e.g `rql-cache-size` can certainly help. + + There are two kinds of elements to put in this cache: + * frequently used elements + * occasional elements + + The former should finish in the _permanent structure after some + warmup. + + Occasional elements can be buggy requests (server-side) or + end-user (web-ui provided) requests. These have to be cleaned up + when they fill the cache, without evicting the usefull, frequently + used entries. + """ + # quite arbitrary, but we want to never + # immortalize some use-a-little query + _maxlevel = 15 + + def __init__(self, ceiling=3000): + self._max = ceiling + # keys belonging forever to this cache + self._permanent = set() + # mapping of key (that can get wiped) to getitem count + self._transient = {} + self._data = {} + self._lock = Lock() + + def __len__(self): + with self._lock: + return len(self._data) + + def __getitem__(self, k): + with self._lock: + if k in self._permanent: + return self._data[k] + v = self._transient.get(k, _MARKER) + if v is _MARKER: + self._transient[k] = 1 + return self._data[k] + if v > self._maxlevel: + self._permanent.add(k) + self._transient.pop(k, None) + else: + self._transient[k] += 1 + return self._data[k] + + def __setitem__(self, k, v): + with self._lock: + if len(self._data) >= self._max: + self._try_to_make_room() + self._data[k] = v + + def pop(self, key, default=_MARKER): + with self._lock: + try: + if default is _MARKER: + return self._data.pop(key) + return self._data.pop(key, default) + finally: + if key in self._permanent: + self._permanent.remove(key) + else: + self._transient.pop(key, None) + + def clear(self): + with self._lock: + self._clear() + + def _clear(self): + self._permanent = set() + self._transient = {} + self._data = {} + + def _try_to_make_room(self): + current_size = len(self._data) + items = sorted(self._transient.items(), key=itemgetter(1)) + level = 0 + for k, v in items: + self._data.pop(k, None) + self._transient.pop(k, None) + if v > level: + datalen = len(self._data) + if datalen == 0: + return + if (current_size - datalen) / datalen > .1: + break + level = v + else: + # we removed cruft but everything is permanent + if len(self._data) >= self._max: + logger.warning('Cache %s is full.' % id(self)) + self._clear() + + def _usage_report(self): + with self._lock: + return {'itemcount': len(self._data), + 'transientcount': len(self._transient), + 'permanentcount': len(self._permanent)} + + def popitem(self): + raise NotImplementedError() + + def setdefault(self, key, default=None): + raise NotImplementedError() + + def update(self, other): + raise NotImplementedError()