# HG changeset patch # User Sylvain Thénault # Date 1410878363 -7200 # Node ID e65873ad03714659da931bc594f954bb723dc9a2 # Parent 6c2d57d1b6de927f818aeb61dd8737801fbab5eb [CWEP002] Add support for computed attribute synchronization Related to #3546717. Test and handle the behaviour with several formulas and identified use cases. To do so add a birth year and a computed attribute age to Person in the test schema. diff -r 6c2d57d1b6de -r e65873ad0371 hooks/synccomputed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hooks/synccomputed.py Tue Sep 16 16:39:23 2014 +0200 @@ -0,0 +1,227 @@ +# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""Hooks for synchronizing computed attributes""" + +__docformat__ = "restructuredtext en" +_ = unicode + +from collections import defaultdict + +from rql import nodes + +from cubicweb.server import hook + + +class RecomputeAttributeOperation(hook.DataOperationMixIn, hook.Operation): + """Operation to recompute caches of computed attribute at commit time, + depending on what's have been modified in the transaction and avoiding to + recompute twice the same attribute + """ + containercls = dict + def add_data(self, computed_attribute, eid=None): + try: + self._container[computed_attribute].add(eid) + except KeyError: + self._container[computed_attribute] = set((eid,)) + + def precommit_event(self): + for computed_attribute_rdef, eids in self.get_data().iteritems(): + attr = computed_attribute_rdef.rtype + formula = computed_attribute_rdef.formula + rql = formula.replace('Any ', 'Any X, ', 1) + kwargs = None + # add constraint on X to the formula + if None in eids : # recompute for all etype if None is found + rql += ', X is %s' % computed_attribute_rdef.subject + elif len(eids) == 1: + rql += ', X eid %(x)s' + kwargs = {'x': eids.pop()} + else: + rql += ', X eid IN (%s)' % ', '.join((str(eid) for eid in eids)) + update_rql = 'SET X %s %%(value)s WHERE X eid %%(x)s' % attr + for eid, value in self.cnx.execute(rql, kwargs): + self.cnx.execute(update_rql, {'value': value, 'x': eid}) + + +class EntityWithCACreatedHook(hook.Hook): + """When creating an entity that has some computed attribute, those + attributes have to be computed. + + Concret class of this hook are generated at registration time by + introspecting the schema. + """ + __abstract__ = True + events = ('after_add_entity',) + # list of computed attribute rdefs that have to be recomputed + computed_attributes = None + + def __call__(self): + for rdef in self.computed_attributes: + RecomputeAttributeOperation.get_instance(self._cw).add_data( + rdef, self.entity.eid) + + +class RelationInvolvedInCAModifiedHook(hook.Hook): + """When some relation used in a computed attribute is updated, those + attributes have to be recomputed. + + Concret class of this hook are generated at registration time by + introspecting the schema. + """ + __abstract__ = True + events = ('after_add_relation', 'before_delete_relation') + # list of (computed attribute rdef, optimize_on) that have to be recomputed + optimized_computed_attributes = None + + def __call__(self): + for rdef, optimize_on in self.optimized_computed_attributes: + if optimize_on is None: + eid = None + else: + eid = getattr(self, optimize_on) + RecomputeAttributeOperation.get_instance(self._cw).add_data(rdef, eid) + + +class AttributeInvolvedInCAModifiedHook(hook.Hook): + """When some attribute used in a computed attribute is updated, those + attributes have to be recomputed. + + Concret class of this hook are generated at registration time by + introspecting the schema. + """ + __abstract__ = True + events = ('after_update_entity',) + # list of (computed attribute rdef, attributes of this entity type involved) + # that may have to be recomputed + attributes_computed_attributes = None + + def __call__(self): + edited_attributes = frozenset(self.entity.cw_edited) + for rdef, used_attributes in self.attributes_computed_attributes.iteritems(): + if edited_attributes.intersection(used_attributes): + # XXX optimize if the modified attributes belong to the same + # entity as the computed attribute + RecomputeAttributeOperation.get_instance(self._cw).add_data(rdef) + + +# code generation at registration time ######################################### + +def _optimize_on(formula_select, rtype): + """Given a formula and some rtype, tells whether on update of the given + relation, formula may be recomputed only for rhe relation's subject + ('eidfrom' returned), object ('eidto' returned) or None. + + Optimizing is only possible when X is used as direct subject/object of this + relation, else we may miss some necessary update. + """ + for rel in formula_select.get_nodes(nodes.Relation): + if rel.r_type == rtype: + sub = rel.get_variable_parts()[0] + obj = rel.get_variable_parts()[1] + if sub.name == 'X': + return 'eidfrom' + elif obj.name == 'X': + return 'eidto' + else: + return None + + +class _FormulaDependenciesMatrix(object): + """This class computes and represents the dependencies of computed attributes + towards relations and attributes + """ + + def __init__(self, schema): + """Analyzes the schema to compute the dependencies""" + # entity types holding some computed attribute {etype: [computed rdefs]} + self.computed_attribute_by_etype = defaultdict(list) + # depending entity types {dep. etype: {computed rdef: dep. etype attributes}} + self.computed_attribute_by_etype_attrs = defaultdict(lambda: defaultdict(set)) + # depending relations def {dep. rdef: [computed rdefs] + self.computed_attribute_by_relation = defaultdict(list) # by rdef + # Walk through all attributes definitions + for rdef in schema.iter_computed_attributes(): + self.computed_attribute_by_etype[rdef.subject.type].append(rdef) + # extract the relations it depends upon - `rdef.formula_select` is + # expected to have been set by finalize_computed_attributes + select = rdef.formula_select + for rel_node in select.get_nodes(nodes.Relation): + rschema = schema.rschema(rel_node.r_type) + lhs, rhs = rel_node.get_variable_parts() + for sol in select.solutions: + subject_etype = sol[lhs.name] + if isinstance(rhs, nodes.VariableRef): + object_etypes = set(sol[rhs.name] for sol in select.solutions) + else: + object_etypes = rschema.objects(subject_etype) + for object_etype in object_etypes: + if rschema.final: + attr_for_computations = self.computed_attribute_by_etype_attrs[subject_etype] + attr_for_computations[rdef].add(rschema.type) + else: + depend_on_rdef = rschema.rdefs[subject_etype, object_etype] + self.computed_attribute_by_relation[depend_on_rdef].append(rdef) + + def generate_entity_creation_hooks(self): + for etype, computed_attributes in self.computed_attribute_by_etype.iteritems(): + regid = 'computed_attribute.%s_created' % etype + selector = hook.is_instance(etype) + yield type('%sCreatedHook' % etype, + (EntityWithCACreatedHook,), + {'__regid__': regid, + '__select__': hook.Hook.__select__ & selector, + 'computed_attributes': computed_attributes}) + + def generate_relation_change_hooks(self): + for rdef, computed_attributes in self.computed_attribute_by_relation.iteritems(): + regid = 'computed_attribute.%s_modified' % rdef.rtype + selector = hook.match_rtype(rdef.rtype.type, + frometypes=(rdef.subject.type,), + toetypes=(rdef.object.type,)) + optimized_computed_attributes = [] + for computed_rdef in computed_attributes: + optimized_computed_attributes.append( + (computed_rdef, + _optimize_on(computed_rdef.formula_select, rdef.rtype)) + ) + yield type('%sModifiedHook' % rdef.rtype, + (RelationInvolvedInCAModifiedHook,), + {'__regid__': regid, + '__select__': hook.Hook.__select__ & selector, + 'optimized_computed_attributes': optimized_computed_attributes}) + + def generate_entity_update_hooks(self): + for etype, attributes_computed_attributes in self.computed_attribute_by_etype_attrs.iteritems(): + regid = 'computed_attribute.%s_updated' % etype + selector = hook.is_instance(etype) + yield type('%sModifiedHook' % etype, + (AttributeInvolvedInCAModifiedHook,), + {'__regid__': regid, + '__select__': hook.Hook.__select__ & selector, + 'attributes_computed_attributes': attributes_computed_attributes}) + + +def registration_callback(vreg): + vreg.register_all(globals().values(), __name__) + dependencies = _FormulaDependenciesMatrix(vreg.schema) + for hook_class in dependencies.generate_entity_creation_hooks(): + vreg.register(hook_class) + for hook_class in dependencies.generate_relation_change_hooks(): + vreg.register(hook_class) + for hook_class in dependencies.generate_entity_update_hooks(): + vreg.register(hook_class) diff -r 6c2d57d1b6de -r e65873ad0371 hooks/test/data-computed/schema.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hooks/test/data-computed/schema.py Tue Sep 16 16:39:23 2014 +0200 @@ -0,0 +1,31 @@ +# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +from yams.buildobjs import EntityType, String, Int, SubjectRelation + +THISYEAR = 2014 + +class Person(EntityType): + name = String() + salaire = Int() + birth_year = Int(required=True) + travaille = SubjectRelation('Societe') + age = Int(formula='Any %d - D WHERE X birth_year D' % THISYEAR) + +class Societe(EntityType): + nom = String() + salaire_total = Int(formula='Any SUM(SA) GROUPBY X WHERE P travaille X, P salaire SA') diff -r 6c2d57d1b6de -r e65873ad0371 hooks/test/unittest_synccomputed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hooks/test/unittest_synccomputed.py Tue Sep 16 16:39:23 2014 +0200 @@ -0,0 +1,139 @@ +# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved. +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr +# +# This file is part of CubicWeb. +# +# CubicWeb is free software: you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free +# Software Foundation, either version 2.1 of the License, or (at your option) +# any later version. +# +# CubicWeb is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more +# details. +# +# You should have received a copy of the GNU Lesser General Public License along +# with CubicWeb. If not, see . +"""unit tests for computed attributes/relations hooks""" + +from unittest import TestCase + +from yams.buildobjs import EntityType, String, Int, SubjectRelation + +from cubicweb.devtools.testlib import CubicWebTC +from cubicweb.schema import build_schema_from_namespace + + +class FormulaDependenciesMatrixTC(TestCase): + + def simple_schema(self): + THISYEAR = 2014 + + class Person(EntityType): + name = String() + salary = Int() + birth_year = Int(required=True) + works_for = SubjectRelation('Company') + age = Int(formula='Any %d - D WHERE X birth_year D' % THISYEAR) + + class Company(EntityType): + name = String() + total_salary = Int(formula='Any SUM(SA) GROUPBY X WHERE P works_for X, P salary SA') + + schema = build_schema_from_namespace(vars().items()) + return schema + + def setUp(self): + from cubicweb.hooks.synccomputed import _FormulaDependenciesMatrix + self.schema = self.simple_schema() + self.dependencies = _FormulaDependenciesMatrix(self.schema) + + def test_computed_attributes_by_etype(self): + comp_by_etype = self.dependencies.computed_attribute_by_etype + self.assertEqual(len(comp_by_etype), 2) + values = comp_by_etype['Person'] + self.assertEqual(len(values), 1) + self.assertEqual(values[0].rtype, 'age') + values = comp_by_etype['Company'] + self.assertEqual(len(values), 1) + self.assertEqual(values[0].rtype, 'total_salary') + + def test_computed_attribute_by_relation(self): + comp_by_rdef = self.dependencies.computed_attribute_by_relation + self.assertEqual(len(comp_by_rdef), 1) + key, values = iter(comp_by_rdef.iteritems()).next() + self.assertEqual(key.rtype, 'works_for') + self.assertEqual(len(values), 1) + self.assertEqual(values[0].rtype, 'total_salary') + + def test_computed_attribute_by_etype_attrs(self): + comp_by_attr = self.dependencies.computed_attribute_by_etype_attrs + self.assertEqual(len(comp_by_attr), 1) + values = comp_by_attr['Person'] + self.assertEqual(len(values), 2) + values = set((rdef.formula, tuple(v)) + for rdef, v in values.iteritems()) + self.assertEquals(values, + set((('Any 2014 - D WHERE X birth_year D', tuple(('birth_year',))), + ('Any SUM(SA) GROUPBY X WHERE P works_for X, P salary SA', tuple(('salary',))))) + ) + + +class ComputedAttributeTC(CubicWebTC): + appid = 'data-computed' + + def setup_entities(self, req): + self.societe = req.create_entity('Societe', nom=u'Foo') + req.create_entity('Person', name=u'Titi', salaire=1000, + travaille=self.societe, birth_year=2001) + self.tata = req.create_entity('Person', name=u'Tata', salaire=2000, + travaille=self.societe, birth_year=1990) + + + def test_update_on_add_remove_relation(self): + """check the rewriting of a computed attribute""" + with self.admin_access.web_request() as req: + self.setup_entities(req) + req.cnx.commit() + rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"') + self.assertEqual(rset[0][0], 3000) + # Add relation. + toto = req.create_entity('Person', name=u'Toto', salaire=1500, + travaille=self.societe, birth_year=1988) + req.cnx.commit() + rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"') + self.assertEqual(rset[0][0], 4500) + # Delete relation. + toto.cw_set(travaille=None) + req.cnx.commit() + rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"') + self.assertEqual(rset[0][0], 3000) + + def test_recompute_on_attribute_update(self): + """check the modification of an attribute triggers the update of the + computed attributes that depend on it""" + with self.admin_access.web_request() as req: + self.setup_entities(req) + req.cnx.commit() + rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"') + self.assertEqual(rset[0][0], 3000) + # Update attribute. + self.tata.cw_set(salaire=1000) + req.cnx.commit() + rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"') + self.assertEqual(rset[0][0], 2000) + + def test_init_on_entity_creation(self): + """check the computed attribute is initialized on entity creation""" + with self.admin_access.web_request() as req: + p = req.create_entity('Person', name=u'Tata', salaire=2000, + birth_year=1990) + req.cnx.commit() + rset = req.execute('Any A, X WHERE X age A, X name "Tata"') + self.assertEqual(rset[0][0], 2014 - 1990) + + +if __name__ == '__main__': + from logilab.common.testlib import unittest_main + unittest_main() diff -r 6c2d57d1b6de -r e65873ad0371 schema.py --- a/schema.py Mon Apr 28 14:11:23 2014 +0200 +++ b/schema.py Tue Sep 16 16:39:23 2014 +0200 @@ -144,6 +144,44 @@ return u', '.join(' '.join(expr.split()) for expr in rqlstring.split(',')) +def _check_valid_formula(rdef, formula_rqlst): + """Check the formula is a valid RQL query with some restriction (no union, + single selected node, etc.), raise BadSchemaDefinition if not + """ + if len(formula_rqlst.children) != 1: + raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: ' + 'can not use UNION in formula %(form)r' % + {'attr' : rdef.rtype, + 'etype' : rdef.subject.type, + 'form' : rdef.formula}) + select = formula_rqlst.children[0] + if len(select.selection) != 1: + raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: ' + 'can only select one term in formula %(form)r' % + {'attr' : rdef.rtype, + 'etype' : rdef.subject.type, + 'form' : rdef.formula}) + term = select.selection[0] + types = set(term.get_type(sol) for sol in select.solutions) + if len(types) != 1: + raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: ' + 'multiple possible types (%(types)s) for formula %(form)r' % + {'attr' : rdef.rtype, + 'etype' : rdef.subject.type, + 'types' : list(types), + 'form' : rdef.formula}) + computed_type = types.pop() + expected_type = rdef.object.type + if computed_type != expected_type: + raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: ' + 'computed attribute type (%(comp_type)s) mismatch with ' + 'specified type (%(attr_type)s)' % + {'attr' : rdef.rtype, + 'etype' : rdef.subject.type, + 'comp_type' : computed_type, + 'attr_type' : expected_type}) + + class RQLExpression(object): """Base class for RQL expression used in schema (constraints and permissions) @@ -1010,6 +1048,12 @@ def schema_by_eid(self, eid): return self._eid_index[eid] + def iter_computed_attributes(self): + for relation in self.relations(): + for rdef in relation.rdefs.itervalues(): + if rdef.final and rdef.formula is not None: + yield rdef + def iter_computed_relations(self): for relation in self.relations(): if relation.rule: @@ -1021,51 +1065,17 @@ self.finalize_computed_relations() def finalize_computed_attributes(self): - """Check consistency of computed attributes types""" + """Check computed attributes validity (if any), else raise + `BadSchemaDefinition` + """ analyzer = ETypeResolver(self) - for relation in self.relations(): - for rdef in relation.rdefs.itervalues(): - if rdef.final and rdef.formula is not None: - computed_etype = rdef.subject.type - computed_attr = rdef.rtype - rqlst = parse(rdef.formula) - if len(rqlst.children) != 1: - raise BadSchemaDefinition( - 'computed attribute %(attr)s on %(etype)s: ' - 'can not use UNION in formula %(form)r' % - dict(attr=computed_attr, - etype=computed_etype, - form=rdef.formula)) - select = rqlst.children[0] - analyzer.visit(select) - if len(select.selection) != 1: - raise BadSchemaDefinition( - 'computed attribute %(attr)s on %(etype)s: ' - 'can only select one term in formula %(form)r' % - dict(attr=computed_attr, - etype=computed_etype, - form=rdef.formula)) - term = select.selection[0] - types = set(term.get_type(sol) for sol in select.solutions) - if len(types) != 1: - raise BadSchemaDefinition( - 'computed attribute %(attr)s on %(etype)s: ' - 'multiple possible types (%(types)s) for formula %(form)s' % - dict(attr=computed_attr, - etype=computed_etype, - types=list(types), - form=rdef.formula)) - computed_type = types.pop() - expected_type = rdef.object.type - if computed_type != expected_type: - raise BadSchemaDefinition( - 'computed attribute %(attr)s on %(etype)s: ' - 'computed attribute type (%(comp_type)s) mismatch with ' - 'specified type (%(attr_type)s)' % - dict(attr=computed_attr, - etype=computed_etype, - comp_type=computed_type, - attr_type=expected_type)) + for rdef in self.iter_computed_attributes(): + rqlst = parse(rdef.formula) + select = rqlst.children[0] + analyzer.visit(select) + _check_valid_formula(rdef, rqlst) + rdef.formula_select = select # avoid later recomputation + def finalize_computed_relations(self): """Build relation definitions for computed relations @@ -1353,6 +1363,7 @@ # only defining here to prevent pylint from complaining info = warning = error = critical = exception = debug = lambda msg,*a,**kw: None + set_log_methods(CubicWebSchemaLoader, getLogger('cubicweb.schemaloader')) set_log_methods(BootstrapSchemaLoader, getLogger('cubicweb.bootstrapschemaloader')) set_log_methods(RQLExpression, getLogger('cubicweb.schema')) diff -r 6c2d57d1b6de -r e65873ad0371 test/unittest_schema.py --- a/test/unittest_schema.py Mon Apr 28 14:11:23 2014 +0200 +++ b/test/unittest_schema.py Tue Sep 16 16:39:23 2014 +0200 @@ -539,5 +539,6 @@ sorted([(r.rtype.type, r.subject.type, r.object.type, role) for r, role in sorted(schema[etype].composite_rdef_roles)]) + if __name__ == '__main__': unittest_main()