# HG changeset patch # User Sylvain Thénault # Date 1267722363 -3600 # Node ID 23df4a120c96d3f5243c03e5cbe85cf9d598c3fe # Parent daa71eaf11e8d71570ded0b9c5565e5a19f819a1# Parent 5642bfa432369559261d1f29893d4965495ce873 backport stable diff -r daa71eaf11e8 -r 23df4a120c96 cwctl.py --- a/cwctl.py Thu Mar 04 17:51:19 2010 +0100 +++ b/cwctl.py Thu Mar 04 18:06:03 2010 +0100 @@ -204,7 +204,7 @@ # simplify constraints if versions: for constraint in versions: - op, ver = constraint.split() + op, ver = constraint if oper is None: oper = op version = ver @@ -238,7 +238,12 @@ for name, constraint in use.items(): self.constraints.setdefault(name,set()) if constraint: - self.constraints[name].add(constraint) + try: + oper, version = constraint.split() + self.constraints[name].add( (oper, version) ) + except: + self.warnings.append('cube %s depends on %s but constraint badly formatted: %s' + % (cube, name, constraint)) self.reverse_constraints.setdefault(name, set()).add(cube) class ListCommand(Command): diff -r daa71eaf11e8 -r 23df4a120c96 entity.py --- a/entity.py Thu Mar 04 17:51:19 2010 +0100 +++ b/entity.py Thu Mar 04 18:06:03 2010 +0100 @@ -876,7 +876,6 @@ _done.add(self.eid) containers = tuple(self.e_schema.fulltext_containers()) if containers: - yielded = False for rschema, target in containers: if target == 'object': targets = getattr(self, rschema.type) @@ -888,8 +887,6 @@ for container in entity.fti_containers(_done): yield container yielded = True - if not yielded: - yield self else: yield self @@ -919,7 +916,6 @@ continue if value: words += tokenize(value) - for rschema, role in self.e_schema.fulltext_relations(): if role == 'subject': for entity in getattr(self, rschema.type): diff -r daa71eaf11e8 -r 23df4a120c96 hooks/metadata.py --- a/hooks/metadata.py Thu Mar 04 17:51:19 2010 +0100 +++ b/hooks/metadata.py Thu Mar 04 18:06:03 2010 +0100 @@ -12,7 +12,6 @@ from cubicweb.selectors import implements from cubicweb.server import hook -from cubicweb.server.repository import FTIndexEntityOp def eschema_type_eid(session, etype): @@ -150,7 +149,8 @@ class UpdateFTIHook(MetaDataHook): - """sync fulltext index when relevant relation is added / removed + """sync fulltext index text index container when a relation with + fulltext_container set is added / removed """ __regid__ = 'updateftirel' events = ('after_add_relation', 'after_delete_relation') @@ -158,15 +158,19 @@ def __call__(self): rtype = self.rtype session = self._cw + ftcontainer = session.vreg.schema.rschema(rtype).fulltext_container if self.event == 'after_add_relation': - # Reindexing the contained entity is enough since it will implicitly - # reindex the container entity. - ftcontainer = session.vreg.schema.rschema(rtype).fulltext_container if ftcontainer == 'subject': - FTIndexEntityOp(session, entity=session.entity_from_eid(self.eidto)) + session.repo.system_source.index_entity( + session, session.entity_from_eid(self.eidfrom)) elif ftcontainer == 'object': - FTIndexEntityOp(session, entity=session.entity_from_eid(self.eidfrom)) - elif session.repo.schema.rschema(rtype).fulltext_container: - FTIndexEntityOp(session, entity=session.entity_from_eid(self.eidto)) - FTIndexEntityOp(session, entity=session.entity_from_eid(self.eidfrom)) + session.repo.system_source.index_entity( + session, session.entity_from_eid(self.eidto)) + # after delete relation + elif ftcontainer == 'subject': + session.repo.system_source.index_entity( + session, entity=session.entity_from_eid(self.eidfrom)) + elif ftcontainer == 'object': + session.repo.system_source.index_entity( + session, entity=session.entity_from_eid(self.eidto)) diff -r daa71eaf11e8 -r 23df4a120c96 hooks/syncschema.py --- a/hooks/syncschema.py Thu Mar 04 17:51:19 2010 +0100 +++ b/hooks/syncschema.py Thu Mar 04 18:06:03 2010 +0100 @@ -469,17 +469,18 @@ rschema = values = None # make pylint happy def precommit_event(self): + session = self.session etype = self.kobj[0] table = SQL_PREFIX + etype column = SQL_PREFIX + self.rschema.type if 'indexed' in self.values: - sysource = self.session.pool.source('system') + sysource = session.pool.source('system') if self.values['indexed']: - sysource.create_index(self.session, table, column) + sysource.create_index(session, table, column) else: - sysource.drop_index(self.session, table, column) + sysource.drop_index(session, table, column) if 'cardinality' in self.values and self.rschema.final: - adbh = self.session.pool.source('system').dbhelper + adbh = session.pool.source('system').dbhelper if not adbh.alter_column_support: # not supported (and NOT NULL not set by yams in that case, so # no worry) @@ -491,11 +492,17 @@ # XXX check self.values['cardinality'][0] actually changed? notnull = self.values['cardinality'][0] != '1' sql = adbh.sql_set_null_allowed(table, column, coltype, notnull) - self.session.system_sql(sql) + session.system_sql(sql) if 'fulltextindexed' in self.values: - UpdateFTIndexOp(self.session) - self.session.transaction_data.setdefault('fti_update_etypes', - set()).add(etype) + UpdateFTIndexOp(session) + session.transaction_data.setdefault( + 'fti_update_etypes', set()).add(etype) + elif 'fulltext_container' in self.values: + ftiupdates = session.transaction_data.setdefault( + 'fti_update_etypes', set()) + ftiupdates.add(etype) + ftiupdates.add(self.kobj[1]) + UpdateFTIndexOp(session) class SourceDbCWConstraintAdd(hook.Operation): @@ -1161,7 +1168,8 @@ source.fti_unindex_entity(session, entity.eid) for container in entity.fti_containers(): if still_fti or container is not entity: - session.repo.index_entity(session, container) + source.fti_unindex_entity(session, entity.eid) + source.fti_index_entity(session, container) except Exception: self.critical('Error while updating Full Text Index for' ' entity %s', entity.eid, exc_info=True) diff -r daa71eaf11e8 -r 23df4a120c96 schemas/workflow.py --- a/schemas/workflow.py Thu Mar 04 17:51:19 2010 +0100 +++ b/schemas/workflow.py Thu Mar 04 18:06:03 2010 +0100 @@ -19,7 +19,7 @@ name = String(required=True, indexed=True, internationalizable=True, maxsize=256) - description = RichString(fulltextindexed=True, default_format='text/rest', + description = RichString(default_format='text/rest', description=_('semantic description of this workflow')) workflow_of = SubjectRelation('CWEType', cardinality='+*', @@ -53,7 +53,7 @@ maxsize=256, constraints=[RQLUniqueConstraint('S name N, S state_of WF, Y state_of WF, Y name N', 'Y', _('workflow already have a state of that name'))]) - description = RichString(fulltextindexed=True, default_format='text/rest', + description = RichString(default_format='text/rest', description=_('semantic description of this state')) # XXX should be on BaseTransition w/ AND/OR selectors when we will @@ -77,8 +77,7 @@ constraints=[RQLUniqueConstraint('S name N, S transition_of WF, Y transition_of WF, Y name N', 'Y', _('workflow already have a transition of that name'))]) type = String(vocabulary=(_('normal'), _('auto')), default='normal') - description = RichString(fulltextindexed=True, - description=_('semantic description of this transition')) + description = RichString(description=_('semantic description of this transition')) condition = SubjectRelation('RQLExpression', cardinality='*?', composite='subject', description=_('a RQL expression which should return some results, ' 'else the transition won\'t be available. ' diff -r daa71eaf11e8 -r 23df4a120c96 server/checkintegrity.py --- a/server/checkintegrity.py Thu Mar 04 17:51:19 2010 +0100 +++ b/server/checkintegrity.py Thu Mar 04 18:06:03 2010 +0100 @@ -80,7 +80,7 @@ cursor.execute(indexer.sql_init_fti()) repo.config.disabled_hooks_categories.add('metadata') repo.config.disabled_hooks_categories.add('integrity') - repo.do_fti = True # ensure full-text indexation is activated + repo.system_source.do_fti = True # ensure full-text indexation is activated etypes = set() for eschema in schema.entities(): if eschema.final: diff -r daa71eaf11e8 -r 23df4a120c96 server/repository.py --- a/server/repository.py Thu Mar 04 17:51:19 2010 +0100 +++ b/server/repository.py Thu Mar 04 18:06:03 2010 +0100 @@ -71,27 +71,6 @@ pass -class FTIndexEntityOp(hook.LateOperation): - """operation to delay entity full text indexation to commit - - since fti indexing may trigger discovery of other entities, it should be - triggered on precommit, not commit, and this should be done after other - precommit operation which may add relations to the entity - """ - - def precommit_event(self): - session = self.session - entity = self.entity - if entity.eid in session.transaction_data.get('pendingeids', ()): - return # entity added and deleted in the same transaction - session.repo.system_source.fti_unindex_entity(session, entity.eid) - for container in entity.fti_containers(): - session.repo.index_entity(session, container) - - def commit_event(self): - pass - - def del_existing_rel_if_needed(session, eidfrom, rtype, eidto): """delete existing relation when adding a new one if card is 1 or ? @@ -133,6 +112,7 @@ if rset: safe_delete_relation(session, rschema, *rset[0]) + def safe_delete_relation(session, rschema, subject, object): if not rschema.has_perm(session, 'delete', fromeid=subject, toeid=object): raise Unauthorized() @@ -164,8 +144,6 @@ self.vreg.schema = self.schema # until actual schema is loaded... # querier helper, need to be created after sources initialization self.querier = querier.QuerierHelper(self, self.schema) - # should we reindex in changes? - self.do_fti = not config['delay-full-text-indexation'] # sources self.sources = [] self.sources_by_uri = {} @@ -778,7 +756,6 @@ # data sources handling ################################################### # * correspondance between eid and (type, source) # * correspondance between eid and local id (i.e. specific to a given source) - # * searchable text indexes def type_and_source_from_eid(self, eid, session=None): """return a tuple (type, source, extid) for the entity with id """ @@ -905,14 +882,9 @@ and index the entity with the full text index """ # begin by inserting eid/type/source/extid into the entities table - self.system_source.add_info(session, entity, source, extid) - if complete: - entity.complete(entity.e_schema.indexable_attributes()) new = session.transaction_data.setdefault('neweids', set()) new.add(entity.eid) - # now we can update the full text index - if self.do_fti: - FTIndexEntityOp(session, entity=entity) + self.system_source.add_info(session, entity, source, extid, complete) CleanupEidTypeCacheOp(session) def delete_info(self, session, eid): @@ -962,15 +934,6 @@ # he can delete all its relations without security checking session.unsafe_execute(rql, {'x': eid}, 'x', build_descr=False) - def index_entity(self, session, entity): - """full text index a modified entity""" - alreadydone = session.transaction_data.setdefault('indexedeids', set()) - if entity.eid in alreadydone: - self.debug('skipping reindexation of %s, already done', entity.eid) - return - alreadydone.add(entity.eid) - self.system_source.fti_index_entity(session, entity) - def locate_relation_source(self, session, subject, rtype, object): subjsource = self.source_from_eid(subject, session) objsource = self.source_from_eid(object, session) @@ -1106,14 +1069,10 @@ if not only_inline_rels: self.hm.call_hooks('before_update_entity', session, entity=entity) source.update_entity(session, entity) - if not only_inline_rels: - if need_fti_update and self.do_fti: - # reindex the entity only if this query is updating at least - # one indexable attribute - FTIndexEntityOp(session, entity=entity) - if source.should_call_hooks: + self.system_source.update_info(session, entity, need_fti_update) + if source.should_call_hooks: + if not only_inline_rels: self.hm.call_hooks('after_update_entity', session, entity=entity) - if source.should_call_hooks: for attr, value, prevvalue in relations: # if the relation is already cached, update existant cache relcache = entity.relation_cached(attr, 'subject') diff -r daa71eaf11e8 -r 23df4a120c96 server/schemaserial.py --- a/server/schemaserial.py Thu Mar 04 17:51:19 2010 +0100 +++ b/server/schemaserial.py Thu Mar 04 18:06:03 2010 +0100 @@ -228,7 +228,8 @@ else: pb = None groupmap = group_mapping(cursor, interactive=False) - # serialize all entity types, assuring CWEType is serialized first + # serialize all entity types, assuring CWEType is serialized first for proper + # is / is_instance_of insertion eschemas.remove(schema.eschema('CWEType')) eschemas.insert(0, schema.eschema('CWEType')) for eschema in eschemas: diff -r daa71eaf11e8 -r 23df4a120c96 server/sources/__init__.py --- a/server/sources/__init__.py Thu Mar 04 17:51:19 2010 +0100 +++ b/server/sources/__init__.py Thu Mar 04 18:06:03 2010 +0100 @@ -382,6 +382,22 @@ """ raise NotImplementedError() + def modified_entities(self, session, etypes, mtime): + """return a 2-uple: + * list of (etype, eid) of entities of the given types which have been + modified since the given timestamp (actually entities whose full text + index content has changed) + * list of (etype, eid) of entities of the given types which have been + deleted since the given timestamp + """ + raise NotImplementedError() + + def index_entity(self, session, entity): + """create an operation to [re]index textual content of the given entity + on commit + """ + raise NotImplementedError() + def fti_unindex_entity(self, session, eid): """remove text content for entity with the given eid from the full text index @@ -393,16 +409,6 @@ """ raise NotImplementedError() - def modified_entities(self, session, etypes, mtime): - """return a 2-uple: - * list of (etype, eid) of entities of the given types which have been - modified since the given timestamp (actually entities whose full text - index content has changed) - * list of (etype, eid) of entities of the given types which have been - deleted since the given timestamp - """ - raise NotImplementedError() - # sql system source interface ############################################# def sqlexec(self, session, sql, args=None): diff -r daa71eaf11e8 -r 23df4a120c96 server/sources/native.py --- a/server/sources/native.py Thu Mar 04 17:51:19 2010 +0100 +++ b/server/sources/native.py Thu Mar 04 18:06:03 2010 +0100 @@ -17,7 +17,9 @@ from datetime import datetime from base64 import b64decode, b64encode +from logilab.common.compat import any from logilab.common.cache import Cache +from logilab.common.decorators import cached, clear_cache from logilab.common.configuration import Method from logilab.common.adbh import get_adv_func_helper from logilab.common.shellutils import getlogin @@ -26,6 +28,7 @@ from cubicweb import UnknownEid, AuthenticationError, Binary, server from cubicweb.cwconfig import CubicWebNoAppConfiguration +from cubicweb.server import hook from cubicweb.server.utils import crypt_password from cubicweb.server.sqlutils import SQL_PREFIX, SQLAdapterMixIn from cubicweb.server.rqlannotation import set_qdata @@ -150,12 +153,14 @@ self._rql_sqlgen = self.sqlgen_class(appschema, self.dbhelper, self.encoding, ATTR_MAP.copy()) # full text index helper - self.indexer = get_indexer(self.dbdriver, self.encoding) - # advanced functionality helper - self.dbhelper.fti_uid_attr = self.indexer.uid_attr - self.dbhelper.fti_table = self.indexer.table - self.dbhelper.fti_restriction_sql = self.indexer.restriction_sql - self.dbhelper.fti_need_distinct_query = self.indexer.need_distinct + self.do_fti = not repo.config['delay-full-text-indexation'] + if self.do_fti: + self.indexer = get_indexer(self.dbdriver, self.encoding) + # XXX should go away with logilab.db + self.dbhelper.fti_uid_attr = self.indexer.uid_attr + self.dbhelper.fti_table = self.indexer.table + self.dbhelper.fti_restriction_sql = self.indexer.restriction_sql + self.dbhelper.fti_need_distinct_query = self.indexer.need_distinct # sql queries cache self._cache = Cache(repo.config['rql-cache-size']) self._temp_table_data = {} @@ -201,9 +206,10 @@ pool = self.repo._get_pool() pool.pool_set() # check full text index availibility - if not self.indexer.has_fti_table(pool['system']): - self.error('no text index table') - self.indexer = None + if self.do_fti: + if not self.indexer.has_fti_table(pool['system']): + self.critical('no text index table') + self.do_fti = False pool.pool_reset() self.repo._free_pool(pool) @@ -255,6 +261,7 @@ pass # __init__ for authentifier in self.authentifiers: authentifier.set_schema(self.schema) + clear_cache(self, 'need_fti_indexation') def support_entity(self, etype, write=False): """return true if the given entity's type is handled by this adapter @@ -524,7 +531,7 @@ finally: self._eid_creation_lock.release() - def add_info(self, session, entity, source, extid=None): + def add_info(self, session, entity, source, extid=None, complete=True): """add type and source info for an eid into the system table""" # begin by inserting eid/type/source/extid into the entities table if extid is not None: @@ -533,6 +540,20 @@ attrs = {'type': entity.__regid__, 'eid': entity.eid, 'extid': extid, 'source': source.uri, 'mtime': datetime.now()} session.system_sql(self.sqlgen.insert('entities', attrs), attrs) + # now we can update the full text index + if self.do_fti and self.need_fti_indexation(entity.__regid__): + if complete: + entity.complete(entity.e_schema.indexable_attributes()) + FTIndexEntityOp(session, entity=entity) + + def update_info(self, session, entity, need_fti_update): + if self.do_fti and need_fti_update: + # reindex the entity only if this query is updating at least + # one indexable attribute + FTIndexEntityOp(session, entity=entity) + # update entities.mtime + attrs = {'eid': entity.eid, 'mtime': datetime.now()} + session.system_sql(self.sqlgen.update('entities', attrs, ['eid']), attrs) def delete_info(self, session, eid, etype, uri, extid): """delete system information on deletion of an entity by transfering @@ -547,30 +568,6 @@ 'source': uri, 'dtime': datetime.now()} session.system_sql(self.sqlgen.insert('deleted_entities', attrs), attrs) - def fti_unindex_entity(self, session, eid): - """remove text content for entity with the given eid from the full text - index - """ - try: - self.indexer.cursor_unindex_object(eid, session.pool['system']) - except Exception: # let KeyboardInterrupt / SystemExit propagate - if self.indexer is not None: - self.exception('error while unindexing %s', eid) - - def fti_index_entity(self, session, entity): - """add text content of a created/modified entity to the full text index - """ - self.debug('reindexing %r', entity.eid) - try: - self.indexer.cursor_reindex_object(entity.eid, entity, - session.pool['system']) - except Exception: # let KeyboardInterrupt / SystemExit propagate - if self.indexer is not None: - self.exception('error while reindexing %s', entity) - # update entities.mtime - attrs = {'eid': entity.eid, 'mtime': datetime.now()} - session.system_sql(self.sqlgen.update('entities', attrs, ['eid']), attrs) - def modified_entities(self, session, etypes, mtime): """return a 2-uple: * list of (etype, eid) of entities of the given types which have been @@ -587,6 +584,71 @@ delentities = cursor.fetchall() return modentities, delentities + # full text index handling ################################################# + + @cached + def need_fti_indexation(self, etype): + eschema = self.schema.eschema(etype) + if any(eschema.indexable_attributes()): + return True + if any(eschema.fulltext_containers()): + return True + return False + + def index_entity(self, session, entity): + """create an operation to [re]index textual content of the given entity + on commit + """ + FTIndexEntityOp(session, entity=entity) + + def fti_unindex_entity(self, session, eid): + """remove text content for entity with the given eid from the full text + index + """ + try: + self.indexer.cursor_unindex_object(eid, session.pool['system']) + except Exception: # let KeyboardInterrupt / SystemExit propagate + self.exception('error while unindexing %s', eid) + + def fti_index_entity(self, session, entity): + """add text content of a created/modified entity to the full text index + """ + self.debug('reindexing %r', entity.eid) + try: + # use cursor_index_object, not cursor_reindex_object since + # unindexing done in the FTIndexEntityOp + self.indexer.cursor_index_object(entity.eid, entity, + session.pool['system']) + except Exception: # let KeyboardInterrupt / SystemExit propagate + self.exception('error while reindexing %s', entity) + + +class FTIndexEntityOp(hook.LateOperation): + """operation to delay entity full text indexation to commit + + since fti indexing may trigger discovery of other entities, it should be + triggered on precommit, not commit, and this should be done after other + precommit operation which may add relations to the entity + """ + + def precommit_event(self): + session = self.session + entity = self.entity + if entity.eid in session.transaction_data.get('pendingeids', ()): + return # entity added and deleted in the same transaction + alreadydone = session.transaction_data.setdefault('indexedeids', set()) + if entity.eid in alreadydone: + self.debug('skipping reindexation of %s, already done', entity.eid) + return + alreadydone.add(entity.eid) + source = session.repo.system_source + for container in entity.fti_containers(): + source.fti_unindex_entity(session, container.eid) + source.fti_index_entity(session, container) + + def commit_event(self): + pass + def sql_schema(driver): helper = get_adv_func_helper(driver) diff -r daa71eaf11e8 -r 23df4a120c96 server/test/unittest_repository.py --- a/server/test/unittest_repository.py Thu Mar 04 17:51:19 2010 +0100 +++ b/server/test/unittest_repository.py Thu Mar 04 18:06:03 2010 +0100 @@ -26,7 +26,7 @@ from cubicweb.devtools.repotest import tuplify from cubicweb.server import repository, hook from cubicweb.server.sqlutils import SQL_PREFIX - +from cubicweb.server.sources import native # start name server anyway, process will fail if already running os.system('pyro-ns >/dev/null 2>/dev/null &') @@ -430,25 +430,39 @@ self.assertEquals(modified, []) self.assertEquals(deleted, [('Personne', eidp)]) - def test_composite_entity(self): + def test_fulltext_container_entity(self): assert self.schema.rschema('use_email').fulltext_container == 'subject' - eid = self.request().create_entity('EmailAddress', address=u'toto@logilab.fr').eid + req = self.request() + toto = req.create_entity('EmailAddress', address=u'toto@logilab.fr') self.commit() - rset = self.execute('Any X WHERE X has_text %(t)s', {'t': 'toto'}) - self.assertEquals(rset.rows, [[eid]]) - self.execute('SET X use_email Y WHERE X login "admin", Y eid %(y)s', {'y': eid}) + rset = req.execute('Any X WHERE X has_text %(t)s', {'t': 'toto'}) + self.assertEquals(rset.rows, []) + req.user.set_relations(use_email=toto) + self.commit() + rset = req.execute('Any X WHERE X has_text %(t)s', {'t': 'toto'}) + self.assertEquals(rset.rows, [[req.user.eid]]) + req.execute('DELETE X use_email Y WHERE X login "admin", Y eid %(y)s', + {'y': toto.eid}) self.commit() - rset = self.execute('Any X WHERE X has_text %(t)s', {'t': 'toto'}) - self.assertEquals(rset.rows, [[self.session.user.eid]]) - self.execute('DELETE X use_email Y WHERE X login "admin", Y eid %(y)s', {'y': eid}) - self.commit() - rset = self.execute('Any X WHERE X has_text %(t)s', {'t': 'toto'}) + rset = req.execute('Any X WHERE X has_text %(t)s', {'t': 'toto'}) self.assertEquals(rset.rows, []) - eid = self.request().create_entity('EmailAddress', address=u'tutu@logilab.fr').eid - self.execute('SET X use_email Y WHERE X login "admin", Y eid %(y)s', {'y': eid}) + tutu = req.create_entity('EmailAddress', address=u'tutu@logilab.fr') + req.user.set_relations(use_email=tutu) + self.commit() + rset = req.execute('Any X WHERE X has_text %(t)s', {'t': 'tutu'}) + self.assertEquals(rset.rows, [[req.user.eid]]) + tutu.set_attributes(address=u'hip@logilab.fr') self.commit() - rset = self.execute('Any X WHERE X has_text %(t)s', {'t': 'tutu'}) - self.assertEquals(rset.rows, [[self.session.user.eid]]) + rset = req.execute('Any X WHERE X has_text %(t)s', {'t': 'tutu'}) + self.assertEquals(rset.rows, []) + rset = req.execute('Any X WHERE X has_text %(t)s', {'t': 'hip'}) + self.assertEquals(rset.rows, [[req.user.eid]]) + + def test_no_uncessary_ftiindex_op(self): + req = self.request() + req.create_entity('Workflow', name=u'dummy workflow', description=u'huuuuu') + self.failIf(any(x for x in self.session.pending_operations + if isinstance(x, native.FTIndexEntityOp))) class DBInitTC(CubicWebTC):