[repo] Extract rql cache handling to a dedicated class
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Fri, 03 Mar 2017 13:09:11 +0100
changeset 12060 0cdf5fafd234
parent 12059 72724cf53b1d
child 12061 94ae25593c38
[repo] Extract rql cache handling to a dedicated class
cubicweb/devtools/__init__.py
cubicweb/devtools/repotest.py
cubicweb/server/querier.py
cubicweb/server/repository.py
cubicweb/server/test/unittest_security.py
cubicweb/sobjects/services.py
cubicweb/test/unittest_rqlrewrite.py
--- a/cubicweb/devtools/__init__.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/devtools/__init__.py	Fri Mar 03 13:09:11 2017 +0100
@@ -125,7 +125,7 @@
         for cnxset in repo.cnxsets:
             cnxset.reconnect()
         repo._type_cache = {}
-        repo.querier._rql_cache = {}
+        repo.querier.set_schema(repo.schema)
         repo.system_source.reset_caches()
         repo._needs_refresh = False
 
--- a/cubicweb/devtools/repotest.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/devtools/repotest.py	Fri Mar 03 13:09:11 2017 +0100
@@ -277,8 +277,8 @@
         undo_monkey_patch()
 
     def _prepare_plan(self, cnx, rql, kwargs=None):
-        rqlst = self.o.parse(rql, annotate=True)
-        self.o.solutions(cnx, rqlst, kwargs)
+        rqlst = self.repo.vreg.rqlhelper.parse(rql, annotate=True)
+        self.repo.vreg.solutions(cnx, rqlst, kwargs)
         if rqlst.TYPE == 'select':
             self.repo.vreg.rqlhelper.annotate(rqlst)
             for select in rqlst.children:
--- a/cubicweb/server/querier.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/server/querier.py	Fri Mar 03 13:09:11 2017 +0100
@@ -478,33 +478,14 @@
     def set_schema(self, schema):
         self.schema = schema
         repo = self._repo
-        # rql st and solution cache.
-        self._rql_cache = QueryCache(repo.config['rql-cache-size'])
-        # rql cache key cache. Don't bother using a Cache instance: we should
-        # have a limited number of queries in there, since there are no entries
-        # in this cache for user queries (which have no args)
-        self._rql_ck_cache = {}
-        # some cache usage stats
-        self.cache_hit, self.cache_miss = 0, 0
-        # rql parsing / analysing helper
-        self.solutions = repo.vreg.solutions
+        self.rql_cache = RQLCache(repo, schema)
         rqlhelper = repo.vreg.rqlhelper
-        # set backend on the rql helper, will be used for function checking
-        rqlhelper.backend = repo.config.system_source_config['db-driver']
-        self._parse = rqlhelper.parse
         self._annotate = rqlhelper.annotate
         # rql planner
         self._planner = SSPlanner(schema, rqlhelper)
         # sql generation annotator
         self.sqlgen_annotate = SQLGenAnnotator(schema).annotate
 
-    def parse(self, rql, annotate=False):
-        """return a rql syntax tree for the given rql"""
-        try:
-            return self._parse(text_type(rql), annotate=annotate)
-        except UnicodeError:
-            raise RQLSyntaxError(rql)
-
     def plan_factory(self, rqlst, args, cnx):
         """create an execution plan for an INSERT RQL query"""
         if rqlst.TYPE == 'insert':
@@ -535,44 +516,12 @@
             if server.DEBUG & (server.DBG_MORE | server.DBG_SQL):
                 print('*'*80)
             print('querier input', repr(rql), repr(args))
-        # parse the query and binds variables
-        cachekey = (rql,)
         try:
-            if args:
-                # search for named args in query which are eids (hence
-                # influencing query's solutions)
-                eidkeys = self._rql_ck_cache[rql]
-                if eidkeys:
-                    # if there are some, we need a better cache key, eg (rql +
-                    # entity type of each eid)
-                    try:
-                        cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
-                    except UnknownEid:
-                        # we want queries such as "Any X WHERE X eid 9999"
-                        # return an empty result instead of raising UnknownEid
-                        return empty_rset(rql, args)
-            rqlst = self._rql_cache[cachekey]
-            self.cache_hit += 1
-            statsd_c('cache_hit')
-        except KeyError:
-            self.cache_miss += 1
-            statsd_c('cache_miss')
-            rqlst = self.parse(rql)
-            try:
-                # compute solutions for rqlst and return named args in query
-                # which are eids. Notice that if you may not need `eidkeys`, we
-                # have to compute solutions anyway (kept as annotation on the
-                # tree)
-                eidkeys = self.solutions(cnx, rqlst, args)
-            except UnknownEid:
-                # we want queries such as "Any X WHERE X eid 9999" return an
-                # empty result instead of raising UnknownEid
-                return empty_rset(rql, args)
-            if args and rql not in self._rql_ck_cache:
-                self._rql_ck_cache[rql] = eidkeys
-                if eidkeys:
-                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
-            self._rql_cache[cachekey] = rqlst
+            rqlst, cachekey = self.rql_cache.get(cnx, rql, args)
+        except UnknownEid:
+            # we want queries such as "Any X WHERE X eid 9999"
+            # return an empty result instead of raising UnknownEid
+            return empty_rset(rql, args)
         if rqlst.TYPE != 'select':
             if cnx.read_security:
                 check_no_password_selected(rqlst)
@@ -645,6 +594,74 @@
     info = warning = error = critical = exception = debug = lambda msg,*a,**kw: None
 
 
+class RQLCache(object):
+
+    def __init__(self, repo, schema):
+        # rql st and solution cache.
+        self._cache = QueryCache(repo.config['rql-cache-size'])
+        # rql cache key cache. Don't bother using a Cache instance: we should
+        # have a limited number of queries in there, since there are no entries
+        # in this cache for user queries (which have no args)
+        self._ck_cache = {}
+        # some cache usage stats
+        self.cache_hit, self.cache_miss = 0, 0
+        # rql parsing / analysing helper
+        self.solutions = repo.vreg.solutions
+        rqlhelper = repo.vreg.rqlhelper
+        # set backend on the rql helper, will be used for function checking
+        rqlhelper.backend = repo.config.system_source_config['db-driver']
+
+        def parse(rql, annotate=False, parse=rqlhelper.parse):
+            """Return a freshly parsed syntax tree for the given RQL."""
+            try:
+                return parse(text_type(rql), annotate=annotate)
+            except UnicodeError:
+                raise RQLSyntaxError(rql)
+        self._parse = parse
+
+    def __len__(self):
+        return len(self._cache)
+
+    def get(self, cnx, rql, args):
+        """Return syntax tree and cache key for the given RQL.
+
+        Returned syntax tree is cached and must not be modified
+        """
+        # parse the query and binds variables
+        cachekey = (rql,)
+        try:
+            if args:
+                # search for named args in query which are eids (hence
+                # influencing query's solutions)
+                eidkeys = self._ck_cache[rql]
+                if eidkeys:
+                    # if there are some, we need a better cache key, eg (rql +
+                    # entity type of each eid)
+                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
+            rqlst = self._cache[cachekey]
+            self.cache_hit += 1
+            statsd_c('cache_hit')
+        except KeyError:
+            self.cache_miss += 1
+            statsd_c('cache_miss')
+            rqlst = self._parse(rql)
+            # compute solutions for rqlst and return named args in query
+            # which are eids. Notice that if you may not need `eidkeys`, we
+            # have to compute solutions anyway (kept as annotation on the
+            # tree)
+            eidkeys = self.solutions(cnx, rqlst, args)
+            if args and rql not in self._ck_cache:
+                self._ck_cache[rql] = eidkeys
+                if eidkeys:
+                    cachekey = _rql_cache_key(cnx, rql, args, eidkeys)
+            self._cache[cachekey] = rqlst
+        return rqlst, cachekey
+
+    def pop(self, key, *args):
+        """Pop a key from the cache."""
+        self._cache.pop(key, *args)
+
+
 def _rql_cache_key(cnx, rql, args, eidkeys):
     cachekey = [rql]
     type_from_eid = cnx.repo.type_from_eid
--- a/cubicweb/server/repository.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/server/repository.py	Fri Mar 03 13:09:11 2017 +0100
@@ -436,7 +436,7 @@
             thread.join()
             self.info('thread %s finished', thread.getName())
         self.cnxsets.close()
-        hits, misses = self.querier.cache_hit, self.querier.cache_miss
+        hits, misses = self.querier.rql_cache.cache_hit, self.querier.rql_cache.cache_miss
         try:
             self.info('rql st cache hit/miss: %s/%s (%s%% hits)', hits, misses,
                       (hits * 100) / (hits + misses))
@@ -662,7 +662,7 @@
 
     def clear_caches(self, eids):
         etcache = self._type_cache
-        rqlcache = self.querier._rql_cache
+        rqlcache = self.querier.rql_cache
         for eid in eids:
             try:
                 etype = etcache.pop(int(eid))  # may be a string in some cases
--- a/cubicweb/server/test/unittest_security.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/server/test/unittest_security.py	Fri Mar 03 13:09:11 2017 +0100
@@ -521,9 +521,9 @@
         with self.temporary_permissions(Division={'read': ('managers',
                                                            ERQLExpression('X owned_by U'))}):
             with self.new_access(u'iaminusersgrouponly').repo_cnx() as cnx:
+                rqlst = self.repo.vreg.rqlhelper.parse('Any X WHERE X is_instance_of Societe')
+                self.repo.vreg.solutions(cnx, rqlst, {})
                 querier = cnx.repo.querier
-                rqlst = querier.parse('Any X WHERE X is_instance_of Societe')
-                querier.solutions(cnx, rqlst, {})
                 querier._annotate(rqlst)
                 plan = querier.plan_factory(rqlst, {}, cnx)
                 plan.preprocess(rqlst)
--- a/cubicweb/sobjects/services.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/sobjects/services.py	Fri Mar 03 13:09:11 2017 +0100
@@ -39,8 +39,8 @@
         querier = repo.querier
         source = repo.system_source
         for size, maxsize, hits, misses, title in (
-            (len(querier._rql_cache), repo.config['rql-cache-size'],
-             querier.cache_hit, querier.cache_miss, 'rqlt_st'),
+            (len(querier.rql_cache), repo.config['rql-cache-size'],
+             querier.rql_cache.cache_hit, querier.rql_cache.cache_miss, 'rqlt_st'),
             (len(source._cache), repo.config['rql-cache-size'],
              source.cache_hit, source.cache_miss, 'sql'),
         ):
--- a/cubicweb/test/unittest_rqlrewrite.py	Thu Mar 16 17:25:07 2017 +0100
+++ b/cubicweb/test/unittest_rqlrewrite.py	Fri Mar 03 13:09:11 2017 +0100
@@ -512,9 +512,9 @@
         if args is None:
             args = {}
         querier = self.repo.querier
-        union = querier.parse(rql)
+        union = parse(rql) # self.vreg.parse(rql, annotate=True)
         with self.admin_access.repo_cnx() as cnx:
-            querier.solutions(cnx, union, args)
+            self.vreg.solutions(cnx, union, args)
             querier._annotate(union)
             plan = querier.plan_factory(union, args, cnx)
             plan.preprocess(union)