# HG changeset patch # User Adrien Di Mascio # Date 1341558033 -7200 # Node ID 11063635c4e41e0e8f354860c93dfe2580e0f4ef # Parent cc83a3f16c0f178f19e31fcc720919e7914d8524 [fti] allow usage of custom RQL to fetch entities to index (closes #2410509) diff -r cc83a3f16c0f -r 11063635c4e4 entities/__init__.py --- a/entities/__init__.py Fri Jul 06 09:00:32 2012 +0200 +++ b/entities/__init__.py Fri Jul 06 09:00:33 2012 +0200 @@ -40,6 +40,24 @@ """ return the url of the entity creation form for this entity type""" return req.build_url('add/%s' % cls.__regid__, **kwargs) + @classmethod + def cw_fti_index_rql_queries(cls, req): + """return the list of rql queries to fetch entities to FT-index + + The default is to fetch all entities at once and to prefetch + indexable attributes but one could imagine iterating over + "smaller" resultsets if the table is very big or returning + a subset of entities that match some business-logic condition. + """ + restrictions = ['X is %s' % cls.__regid__] + selected = ['X'] + for attrschema in cls.e_schema.indexable_attributes(): + varname = attrschema.type.upper() + restrictions.append('X %s %s' % (attrschema, varname)) + selected.append(varname) + return ['Any %s WHERE %s' % (', '.join(selected), + ', '.join(restrictions))] + # meta data api ########################################################### def dc_title(self): diff -r cc83a3f16c0f -r 11063635c4e4 entities/test/unittest_base.py --- a/entities/test/unittest_base.py Fri Jul 06 09:00:32 2012 +0200 +++ b/entities/test/unittest_base.py Fri Jul 06 09:00:33 2012 +0200 @@ -19,6 +19,7 @@ """unit tests for cubicweb.entities.base module """ +from __future__ import with_statement from logilab.common.testlib import unittest_main from logilab.common.decorators import clear_cache @@ -57,6 +58,12 @@ self.assertEqual(dict((str(k), v) for k, v in self.schema['State'].meta_attributes().iteritems()), {'description_format': ('format', 'description')}) + def test_fti_rql_method(self): + eclass = self.vreg['etypes'].etype_class('EmailAddress') + self.assertEqual(['Any X, ALIAS, ADDRESS WHERE X is EmailAddress, ' + 'X alias ALIAS, X address ADDRESS'], + eclass.cw_fti_index_rql_queries(self.request())) + class EmailAddressTC(BaseEntityTC): def test_canonical_form(self): diff -r cc83a3f16c0f -r 11063635c4e4 server/checkintegrity.py --- a/server/checkintegrity.py Fri Jul 06 09:00:32 2012 +0200 +++ b/server/checkintegrity.py Fri Jul 06 09:00:33 2012 +0200 @@ -134,10 +134,12 @@ # attribute to their current value source = repo.system_source for eschema in etypes: - rset = session.execute('Any X WHERE X is %s' % eschema) - source.fti_index_entities(session, rset.entities()) - # clear entity cache to avoid high memory consumption on big tables - session.drop_entity_cache() + etype_class = session.vreg['etypes'].etype_class(str(eschema)) + for fti_rql in etype_class.cw_fti_index_rql_queries(session): + rset = session.execute(fti_rql) + source.fti_index_entities(session, rset.entities()) + # clear entity cache to avoid high memory consumption on big tables + session.drop_entity_cache() if withpb: pb.update()