[cache] replace lgc.cache with something more appropriate (closes #1921713)
authorAurelien Campeas <aurelien.campeas@logilab.fr>
Fri, 14 Oct 2011 10:33:31 +0200
changeset 7954 a3d3220669d6
parent 7953 a37531c8a4a6
child 7955 f4c97d3c8b93
[cache] replace lgc.cache with something more appropriate (closes #1921713)
misc/migration/3.14.0_Any.py
server/querier.py
server/sources/native.py
test/unittest_utils.py
utils.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/migration/3.14.0_Any.py	Fri Oct 14 10:33:31 2011 +0200
@@ -0,0 +1,2 @@
+config['rql-cache-size'] = config['rql-cache-size'] * 10
+
--- a/server/querier.py	Fri Oct 14 09:21:45 2011 +0200
+++ b/server/querier.py	Fri Oct 14 10:33:31 2011 +0200
@@ -25,7 +25,6 @@
 
 from itertools import repeat
 
-from logilab.common.cache import Cache
 from logilab.common.compat import any
 from rql import RQLSyntaxError
 from rql.stmts import Union, Select
@@ -36,6 +35,7 @@
 from cubicweb import server, typed_eid
 from cubicweb.rset import ResultSet
 
+from cubicweb.utils import QueryCache
 from cubicweb.server.utils import cleanup_solutions
 from cubicweb.server.rqlannotation import SQLGenAnnotator, set_qdata
 from cubicweb.server.ssplanner import READ_ONLY_RTYPES, add_types_restriction
@@ -599,7 +599,7 @@
         self.schema = schema
         repo = self._repo
         # rql st and solution cache.
-        self._rql_cache = Cache(repo.config['rql-cache-size'])
+        self._rql_cache = QueryCache(repo.config['rql-cache-size'])
         # rql cache key cache. Don't bother using a Cache instance: we should
         # have a limited number of queries in there, since there are no entries
         # in this cache for user queries (which have no args)
--- a/server/sources/native.py	Fri Oct 14 09:21:45 2011 +0200
+++ b/server/sources/native.py	Fri Oct 14 10:33:31 2011 +0200
@@ -46,7 +46,6 @@
 import sys
 
 from logilab.common.compat import any
-from logilab.common.cache import Cache
 from logilab.common.decorators import cached, clear_cache
 from logilab.common.configuration import Method
 from logilab.common.shellutils import getlogin
@@ -58,6 +57,7 @@
 from cubicweb import (UnknownEid, AuthenticationError, ValidationError, Binary,
                       UniqueTogetherError)
 from cubicweb import transaction as tx, server, neg_role
+from cubicweb.utils import QueryCache
 from cubicweb.schema import VIRTUAL_RTYPES
 from cubicweb.cwconfig import CubicWebNoAppConfiguration
 from cubicweb.server import hook
@@ -295,7 +295,7 @@
         # full text index helper
         self.do_fti = not repo.config['delay-full-text-indexation']
         # sql queries cache
-        self._cache = Cache(repo.config['rql-cache-size'])
+        self._cache = QueryCache(repo.config['rql-cache-size'])
         self._temp_table_data = {}
         # we need a lock to protect eid attribution function (XXX, really?
         # explain)
@@ -343,7 +343,7 @@
 
     def reset_caches(self):
         """method called during test to reset potential source caches"""
-        self._cache = Cache(self.repo.config['rql-cache-size'])
+        self._cache = QueryCache(self.repo.config['rql-cache-size'])
 
     def clear_eid_cache(self, eid, etype):
         """clear potential caches for the given eid"""
@@ -463,7 +463,7 @@
 
     def set_schema(self, schema):
         """set the instance'schema"""
-        self._cache = Cache(self.repo.config['rql-cache-size'])
+        self._cache = QueryCache(self.repo.config['rql-cache-size'])
         self.cache_hit, self.cache_miss, self.no_cache = 0, 0, 0
         self.schema = schema
         try:
--- a/test/unittest_utils.py	Fri Oct 14 09:21:45 2011 +0200
+++ b/test/unittest_utils.py	Fri Oct 14 10:33:31 2011 +0200
@@ -26,7 +26,7 @@
 
 from cubicweb.devtools.testlib import CubicWebTC
 from cubicweb.utils import (make_uid, UStringIO, SizeConstrainedList,
-                            RepeatList, HTMLHead)
+                            RepeatList, HTMLHead, QueryCache)
 from cubicweb.entity import Entity
 
 try:
@@ -50,6 +50,55 @@
                           'some numeric character, got %s' % uid)
             d.add(uid)
 
+class TestQueryCache(TestCase):
+    def test_querycache(self):
+        c = QueryCache(ceiling=20)
+        # write only
+        for x in xrange(10):
+            c[x] = x
+        self.assertEqual(c._usage_report(),
+                         {'transientcount': 0,
+                          'itemcount': 10,
+                          'permanentcount': 0})
+        c = QueryCache(ceiling=10)
+        # we should also get a warning
+        for x in xrange(20):
+            c[x] = x
+        self.assertEqual(c._usage_report(),
+                         {'transientcount': 0,
+                          'itemcount': 10,
+                          'permanentcount': 0})
+        # write + reads
+        c = QueryCache(ceiling=20)
+        for n in xrange(4):
+            for x in xrange(10):
+                c[x] = x
+                c[x]
+        self.assertEqual(c._usage_report(),
+                         {'transientcount': 10,
+                          'itemcount': 10,
+                          'permanentcount': 0})
+        c = QueryCache(ceiling=20)
+        for n in xrange(17):
+            for x in xrange(10):
+                c[x] = x
+                c[x]
+        self.assertEqual(c._usage_report(),
+                         {'transientcount': 0,
+                          'itemcount': 10,
+                          'permanentcount': 10})
+        c = QueryCache(ceiling=20)
+        for n in xrange(17):
+            for x in xrange(10):
+                c[x] = x
+                if n % 2:
+                    c[x]
+                if x % 2:
+                    c[x]
+        self.assertEqual(c._usage_report(),
+                         {'transientcount': 5,
+                          'itemcount': 10,
+                          'permanentcount': 5})
 
 class UStringIOTC(TestCase):
     def test_boolean_value(self):
--- a/utils.py	Fri Oct 14 09:21:45 2011 +0200
+++ b/utils.py	Fri Oct 14 10:33:31 2011 +0200
@@ -16,18 +16,21 @@
 # You should have received a copy of the GNU Lesser General Public License along
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
 """Some utilities for CubicWeb server/clients."""
-
+from __future__ import division
 __docformat__ = "restructuredtext en"
 
-import os
 import sys
 import decimal
 import datetime
 import random
+from operator import itemgetter
 from inspect import getargspec
 from itertools import repeat
 from uuid import uuid4
 from warnings import warn
+from threading import Lock
+
+from logging import getLogger
 
 from logilab.mtconverter import xml_escape
 from logilab.common.deprecation import deprecated
@@ -551,3 +554,125 @@
                  'strptime'):
     msg = '[3.6] %s has been moved to logilab.common.date' % funcname
     _THIS_MOD_NS[funcname] = deprecated(msg)(getattr(date, funcname))
+
+
+logger = getLogger('cubicweb.utils')
+
+class QueryCache(object):
+    """ a minimalist dict-like object to be used by the querier
+    and native source (replaces lgc.cache for this very usage)
+
+    To be efficient it must be properly used. The usage patterns are
+    quite specific to its current clients.
+
+    The ceiling value should be sufficiently high, else it will be
+    ruthlessly inefficient (there will be warnings when this happens).
+    A good (high enough) value can only be set on a per-application
+    value. A default, reasonnably high value is provided but tuning
+    e.g `rql-cache-size` can certainly help.
+
+    There are two kinds of elements to put in this cache:
+    * frequently used elements
+    * occasional elements
+
+    The former should finish in the _permanent structure after some
+    warmup.
+
+    Occasional elements can be buggy requests (server-side) or
+    end-user (web-ui provided) requests. These have to be cleaned up
+    when they fill the cache, without evicting the usefull, frequently
+    used entries.
+    """
+    # quite arbitrary, but we want to never
+    # immortalize some use-a-little query
+    _maxlevel = 15
+
+    def __init__(self, ceiling=3000):
+        self._max = ceiling
+        # keys belonging forever to this cache
+        self._permanent = set()
+        # mapping of key (that can get wiped) to getitem count
+        self._transient = {}
+        self._data = {}
+        self._lock = Lock()
+
+    def __len__(self):
+        with self._lock:
+            return len(self._data)
+
+    def __getitem__(self, k):
+        with self._lock:
+            if k in self._permanent:
+                return self._data[k]
+            v = self._transient.get(k, _MARKER)
+            if v is _MARKER:
+                self._transient[k] = 1
+                return self._data[k]
+            if v > self._maxlevel:
+                self._permanent.add(k)
+                self._transient.pop(k, None)
+            else:
+                self._transient[k] += 1
+            return self._data[k]
+
+    def __setitem__(self, k, v):
+        with self._lock:
+            if len(self._data) >= self._max:
+                self._try_to_make_room()
+            self._data[k] = v
+
+    def pop(self, key, default=_MARKER):
+        with self._lock:
+            try:
+                if default is _MARKER:
+                    return self._data.pop(key)
+                return self._data.pop(key, default)
+            finally:
+                if key in self._permanent:
+                    self._permanent.remove(key)
+                else:
+                    self._transient.pop(key, None)
+
+    def clear(self):
+        with self._lock:
+            self._clear()
+
+    def _clear(self):
+        self._permanent = set()
+        self._transient = {}
+        self._data = {}
+
+    def _try_to_make_room(self):
+        current_size = len(self._data)
+        items = sorted(self._transient.items(), key=itemgetter(1))
+        level = 0
+        for k, v in items:
+            self._data.pop(k, None)
+            self._transient.pop(k, None)
+            if v > level:
+                datalen = len(self._data)
+                if datalen == 0:
+                    return
+                if (current_size - datalen) / datalen > .1:
+                    break
+                level = v
+        else:
+            # we removed cruft but everything is permanent
+            if len(self._data) >= self._max:
+                logger.warning('Cache %s is full.' % id(self))
+                self._clear()
+
+    def _usage_report(self):
+        with self._lock:
+            return {'itemcount': len(self._data),
+                    'transientcount': len(self._transient),
+                    'permanentcount': len(self._permanent)}
+
+    def popitem(self):
+        raise NotImplementedError()
+
+    def setdefault(self, key, default=None):
+        raise NotImplementedError()
+
+    def update(self, other):
+        raise NotImplementedError()