[fti] allow usage of custom RQL to fetch entities to index (closes #2410509)
authorAdrien Di Mascio <Adrien.DiMascio@logilab.fr>
Fri, 06 Jul 2012 09:00:33 +0200
changeset 8450 11063635c4e4
parent 8449 cc83a3f16c0f
child 8454 113184eb4e06
[fti] allow usage of custom RQL to fetch entities to index (closes #2410509)
entities/__init__.py
entities/test/unittest_base.py
server/checkintegrity.py
--- a/entities/__init__.py	Fri Jul 06 09:00:32 2012 +0200
+++ b/entities/__init__.py	Fri Jul 06 09:00:33 2012 +0200
@@ -40,6 +40,24 @@
         """ return the url of the entity creation form for this entity type"""
         return req.build_url('add/%s' % cls.__regid__, **kwargs)
 
+    @classmethod
+    def cw_fti_index_rql_queries(cls, req):
+        """return the list of rql queries to fetch entities to FT-index
+
+        The default is to fetch all entities at once and to prefetch
+        indexable attributes but one could imagine iterating over
+        "smaller" resultsets if the table is very big or returning
+        a subset of entities that match some business-logic condition.
+        """
+        restrictions = ['X is %s' % cls.__regid__]
+        selected = ['X']
+        for attrschema in cls.e_schema.indexable_attributes():
+            varname = attrschema.type.upper()
+            restrictions.append('X %s %s' % (attrschema, varname))
+            selected.append(varname)
+        return ['Any %s WHERE %s' % (', '.join(selected),
+                                     ', '.join(restrictions))]
+
     # meta data api ###########################################################
 
     def dc_title(self):
--- a/entities/test/unittest_base.py	Fri Jul 06 09:00:32 2012 +0200
+++ b/entities/test/unittest_base.py	Fri Jul 06 09:00:33 2012 +0200
@@ -19,6 +19,7 @@
 """unit tests for cubicweb.entities.base module
 
 """
+from __future__ import with_statement
 
 from logilab.common.testlib import unittest_main
 from logilab.common.decorators import clear_cache
@@ -57,6 +58,12 @@
         self.assertEqual(dict((str(k), v) for k, v in self.schema['State'].meta_attributes().iteritems()),
                           {'description_format': ('format', 'description')})
 
+    def test_fti_rql_method(self):
+        eclass = self.vreg['etypes'].etype_class('EmailAddress')
+        self.assertEqual(['Any X, ALIAS, ADDRESS WHERE X is EmailAddress, '
+                          'X alias ALIAS, X address ADDRESS'],
+                         eclass.cw_fti_index_rql_queries(self.request()))
+
 
 class EmailAddressTC(BaseEntityTC):
     def test_canonical_form(self):
--- a/server/checkintegrity.py	Fri Jul 06 09:00:32 2012 +0200
+++ b/server/checkintegrity.py	Fri Jul 06 09:00:33 2012 +0200
@@ -134,10 +134,12 @@
     # attribute to their current value
     source = repo.system_source
     for eschema in etypes:
-        rset = session.execute('Any X WHERE X is %s' % eschema)
-        source.fti_index_entities(session, rset.entities())
-        # clear entity cache to avoid high memory consumption on big tables
-        session.drop_entity_cache()
+        etype_class = session.vreg['etypes'].etype_class(str(eschema))
+        for fti_rql in etype_class.cw_fti_index_rql_queries(session):
+            rset = session.execute(fti_rql)
+            source.fti_index_entities(session, rset.entities())
+            # clear entity cache to avoid high memory consumption on big tables
+            session.drop_entity_cache()
         if withpb:
             pb.update()