--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/migration/3.14.0_Any.py Fri Oct 14 10:33:31 2011 +0200
@@ -0,0 +1,2 @@
+config['rql-cache-size'] = config['rql-cache-size'] * 10
+
--- a/server/querier.py Fri Oct 14 09:21:45 2011 +0200
+++ b/server/querier.py Fri Oct 14 10:33:31 2011 +0200
@@ -25,7 +25,6 @@
from itertools import repeat
-from logilab.common.cache import Cache
from logilab.common.compat import any
from rql import RQLSyntaxError
from rql.stmts import Union, Select
@@ -36,6 +35,7 @@
from cubicweb import server, typed_eid
from cubicweb.rset import ResultSet
+from cubicweb.utils import QueryCache
from cubicweb.server.utils import cleanup_solutions
from cubicweb.server.rqlannotation import SQLGenAnnotator, set_qdata
from cubicweb.server.ssplanner import READ_ONLY_RTYPES, add_types_restriction
@@ -599,7 +599,7 @@
self.schema = schema
repo = self._repo
# rql st and solution cache.
- self._rql_cache = Cache(repo.config['rql-cache-size'])
+ self._rql_cache = QueryCache(repo.config['rql-cache-size'])
# rql cache key cache. Don't bother using a Cache instance: we should
# have a limited number of queries in there, since there are no entries
# in this cache for user queries (which have no args)
--- a/server/sources/native.py Fri Oct 14 09:21:45 2011 +0200
+++ b/server/sources/native.py Fri Oct 14 10:33:31 2011 +0200
@@ -46,7 +46,6 @@
import sys
from logilab.common.compat import any
-from logilab.common.cache import Cache
from logilab.common.decorators import cached, clear_cache
from logilab.common.configuration import Method
from logilab.common.shellutils import getlogin
@@ -58,6 +57,7 @@
from cubicweb import (UnknownEid, AuthenticationError, ValidationError, Binary,
UniqueTogetherError)
from cubicweb import transaction as tx, server, neg_role
+from cubicweb.utils import QueryCache
from cubicweb.schema import VIRTUAL_RTYPES
from cubicweb.cwconfig import CubicWebNoAppConfiguration
from cubicweb.server import hook
@@ -295,7 +295,7 @@
# full text index helper
self.do_fti = not repo.config['delay-full-text-indexation']
# sql queries cache
- self._cache = Cache(repo.config['rql-cache-size'])
+ self._cache = QueryCache(repo.config['rql-cache-size'])
self._temp_table_data = {}
# we need a lock to protect eid attribution function (XXX, really?
# explain)
@@ -343,7 +343,7 @@
def reset_caches(self):
"""method called during test to reset potential source caches"""
- self._cache = Cache(self.repo.config['rql-cache-size'])
+ self._cache = QueryCache(self.repo.config['rql-cache-size'])
def clear_eid_cache(self, eid, etype):
"""clear potential caches for the given eid"""
@@ -463,7 +463,7 @@
def set_schema(self, schema):
"""set the instance'schema"""
- self._cache = Cache(self.repo.config['rql-cache-size'])
+ self._cache = QueryCache(self.repo.config['rql-cache-size'])
self.cache_hit, self.cache_miss, self.no_cache = 0, 0, 0
self.schema = schema
try:
--- a/test/unittest_utils.py Fri Oct 14 09:21:45 2011 +0200
+++ b/test/unittest_utils.py Fri Oct 14 10:33:31 2011 +0200
@@ -26,7 +26,7 @@
from cubicweb.devtools.testlib import CubicWebTC
from cubicweb.utils import (make_uid, UStringIO, SizeConstrainedList,
- RepeatList, HTMLHead)
+ RepeatList, HTMLHead, QueryCache)
from cubicweb.entity import Entity
try:
@@ -50,6 +50,55 @@
'some numeric character, got %s' % uid)
d.add(uid)
+class TestQueryCache(TestCase):
+ def test_querycache(self):
+ c = QueryCache(ceiling=20)
+ # write only
+ for x in xrange(10):
+ c[x] = x
+ self.assertEqual(c._usage_report(),
+ {'transientcount': 0,
+ 'itemcount': 10,
+ 'permanentcount': 0})
+ c = QueryCache(ceiling=10)
+ # we should also get a warning
+ for x in xrange(20):
+ c[x] = x
+ self.assertEqual(c._usage_report(),
+ {'transientcount': 0,
+ 'itemcount': 10,
+ 'permanentcount': 0})
+ # write + reads
+ c = QueryCache(ceiling=20)
+ for n in xrange(4):
+ for x in xrange(10):
+ c[x] = x
+ c[x]
+ self.assertEqual(c._usage_report(),
+ {'transientcount': 10,
+ 'itemcount': 10,
+ 'permanentcount': 0})
+ c = QueryCache(ceiling=20)
+ for n in xrange(17):
+ for x in xrange(10):
+ c[x] = x
+ c[x]
+ self.assertEqual(c._usage_report(),
+ {'transientcount': 0,
+ 'itemcount': 10,
+ 'permanentcount': 10})
+ c = QueryCache(ceiling=20)
+ for n in xrange(17):
+ for x in xrange(10):
+ c[x] = x
+ if n % 2:
+ c[x]
+ if x % 2:
+ c[x]
+ self.assertEqual(c._usage_report(),
+ {'transientcount': 5,
+ 'itemcount': 10,
+ 'permanentcount': 5})
class UStringIOTC(TestCase):
def test_boolean_value(self):
--- a/utils.py Fri Oct 14 09:21:45 2011 +0200
+++ b/utils.py Fri Oct 14 10:33:31 2011 +0200
@@ -16,18 +16,21 @@
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
"""Some utilities for CubicWeb server/clients."""
-
+from __future__ import division
__docformat__ = "restructuredtext en"
-import os
import sys
import decimal
import datetime
import random
+from operator import itemgetter
from inspect import getargspec
from itertools import repeat
from uuid import uuid4
from warnings import warn
+from threading import Lock
+
+from logging import getLogger
from logilab.mtconverter import xml_escape
from logilab.common.deprecation import deprecated
@@ -551,3 +554,125 @@
'strptime'):
msg = '[3.6] %s has been moved to logilab.common.date' % funcname
_THIS_MOD_NS[funcname] = deprecated(msg)(getattr(date, funcname))
+
+
+logger = getLogger('cubicweb.utils')
+
+class QueryCache(object):
+ """ a minimalist dict-like object to be used by the querier
+ and native source (replaces lgc.cache for this very usage)
+
+ To be efficient it must be properly used. The usage patterns are
+ quite specific to its current clients.
+
+ The ceiling value should be sufficiently high, else it will be
+ ruthlessly inefficient (there will be warnings when this happens).
+ A good (high enough) value can only be set on a per-application
+ value. A default, reasonnably high value is provided but tuning
+ e.g `rql-cache-size` can certainly help.
+
+ There are two kinds of elements to put in this cache:
+ * frequently used elements
+ * occasional elements
+
+ The former should finish in the _permanent structure after some
+ warmup.
+
+ Occasional elements can be buggy requests (server-side) or
+ end-user (web-ui provided) requests. These have to be cleaned up
+ when they fill the cache, without evicting the usefull, frequently
+ used entries.
+ """
+ # quite arbitrary, but we want to never
+ # immortalize some use-a-little query
+ _maxlevel = 15
+
+ def __init__(self, ceiling=3000):
+ self._max = ceiling
+ # keys belonging forever to this cache
+ self._permanent = set()
+ # mapping of key (that can get wiped) to getitem count
+ self._transient = {}
+ self._data = {}
+ self._lock = Lock()
+
+ def __len__(self):
+ with self._lock:
+ return len(self._data)
+
+ def __getitem__(self, k):
+ with self._lock:
+ if k in self._permanent:
+ return self._data[k]
+ v = self._transient.get(k, _MARKER)
+ if v is _MARKER:
+ self._transient[k] = 1
+ return self._data[k]
+ if v > self._maxlevel:
+ self._permanent.add(k)
+ self._transient.pop(k, None)
+ else:
+ self._transient[k] += 1
+ return self._data[k]
+
+ def __setitem__(self, k, v):
+ with self._lock:
+ if len(self._data) >= self._max:
+ self._try_to_make_room()
+ self._data[k] = v
+
+ def pop(self, key, default=_MARKER):
+ with self._lock:
+ try:
+ if default is _MARKER:
+ return self._data.pop(key)
+ return self._data.pop(key, default)
+ finally:
+ if key in self._permanent:
+ self._permanent.remove(key)
+ else:
+ self._transient.pop(key, None)
+
+ def clear(self):
+ with self._lock:
+ self._clear()
+
+ def _clear(self):
+ self._permanent = set()
+ self._transient = {}
+ self._data = {}
+
+ def _try_to_make_room(self):
+ current_size = len(self._data)
+ items = sorted(self._transient.items(), key=itemgetter(1))
+ level = 0
+ for k, v in items:
+ self._data.pop(k, None)
+ self._transient.pop(k, None)
+ if v > level:
+ datalen = len(self._data)
+ if datalen == 0:
+ return
+ if (current_size - datalen) / datalen > .1:
+ break
+ level = v
+ else:
+ # we removed cruft but everything is permanent
+ if len(self._data) >= self._max:
+ logger.warning('Cache %s is full.' % id(self))
+ self._clear()
+
+ def _usage_report(self):
+ with self._lock:
+ return {'itemcount': len(self._data),
+ 'transientcount': len(self._transient),
+ 'permanentcount': len(self._permanent)}
+
+ def popitem(self):
+ raise NotImplementedError()
+
+ def setdefault(self, key, default=None):
+ raise NotImplementedError()
+
+ def update(self, other):
+ raise NotImplementedError()