[CWEP002] Add support for computed attribute synchronization
authorSylvain Thénault
Tue, 16 Sep 2014 16:39:23 +0200 (2014-09-16)
changeset 9967 e65873ad0371
parent 9966 6c2d57d1b6de
child 9968 50f046bf0e50
[CWEP002] Add support for computed attribute synchronization Related to #3546717. Test and handle the behaviour with several formulas and identified use cases. To do so add a birth year and a computed attribute age to Person in the test schema.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hooks/synccomputed.py	Tue Sep 16 16:39:23 2014 +0200
@@ -0,0 +1,227 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+# This file is part of CubicWeb.
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+# details.
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
+"""Hooks for synchronizing computed attributes"""
+__docformat__ = "restructuredtext en"
+_ = unicode
+from collections import defaultdict
+from rql import nodes
+from cubicweb.server import hook
+class RecomputeAttributeOperation(hook.DataOperationMixIn, hook.Operation):
+    """Operation to recompute caches of computed attribute at commit time,
+    depending on what's have been modified in the transaction and avoiding to
+    recompute twice the same attribute
+    """
+    containercls = dict
+    def add_data(self, computed_attribute, eid=None):
+        try:
+            self._container[computed_attribute].add(eid)
+        except KeyError:
+            self._container[computed_attribute] = set((eid,))
+    def precommit_event(self):
+        for computed_attribute_rdef, eids in self.get_data().iteritems():
+            attr = computed_attribute_rdef.rtype
+            formula  = computed_attribute_rdef.formula
+            rql = formula.replace('Any ', 'Any X, ', 1)
+            kwargs = None
+            # add constraint on X to the formula
+            if None in eids : # recompute for all etype if None is found
+                rql += ', X is %s' % computed_attribute_rdef.subject
+            elif len(eids) == 1:
+                rql += ', X eid %(x)s'
+                kwargs = {'x': eids.pop()}
+            else:
+                rql += ', X eid IN (%s)' % ', '.join((str(eid) for eid in eids))
+            update_rql = 'SET X %s %%(value)s WHERE X eid %%(x)s' % attr
+            for eid, value in self.cnx.execute(rql, kwargs):
+                self.cnx.execute(update_rql, {'value': value, 'x': eid})
+class EntityWithCACreatedHook(hook.Hook):
+    """When creating an entity that has some computed attribute, those
+    attributes have to be computed.
+    Concret class of this hook are generated at registration time by
+    introspecting the schema.
+    """
+    __abstract__ = True
+    events = ('after_add_entity',)
+    # list of computed attribute rdefs that have to be recomputed
+    computed_attributes = None
+    def __call__(self):
+        for rdef in self.computed_attributes:
+            RecomputeAttributeOperation.get_instance(self._cw).add_data(
+                rdef, self.entity.eid)
+class RelationInvolvedInCAModifiedHook(hook.Hook):
+    """When some relation used in a computed attribute is updated, those
+    attributes have to be recomputed.
+    Concret class of this hook are generated at registration time by
+    introspecting the schema.
+    """
+    __abstract__ = True
+    events = ('after_add_relation', 'before_delete_relation')
+    # list of (computed attribute rdef, optimize_on) that have to be recomputed
+    optimized_computed_attributes = None
+    def __call__(self):
+        for rdef, optimize_on in self.optimized_computed_attributes:
+            if optimize_on is None:
+                eid = None
+            else:
+                eid = getattr(self, optimize_on)
+            RecomputeAttributeOperation.get_instance(self._cw).add_data(rdef, eid)
+class AttributeInvolvedInCAModifiedHook(hook.Hook):
+    """When some attribute used in a computed attribute is updated, those
+    attributes have to be recomputed.
+    Concret class of this hook are generated at registration time by
+    introspecting the schema.
+    """
+    __abstract__ = True
+    events = ('after_update_entity',)
+    # list of (computed attribute rdef, attributes of this entity type involved)
+    # that may have to be recomputed
+    attributes_computed_attributes = None
+    def __call__(self):
+        edited_attributes = frozenset(self.entity.cw_edited)
+        for rdef, used_attributes in self.attributes_computed_attributes.iteritems():
+            if edited_attributes.intersection(used_attributes):
+                # XXX optimize if the modified attributes belong to the same
+                # entity as the computed attribute
+                RecomputeAttributeOperation.get_instance(self._cw).add_data(rdef)
+# code generation at registration time #########################################
+def _optimize_on(formula_select, rtype):
+    """Given a formula and some rtype, tells whether on update of the given
+    relation, formula may be recomputed only for rhe relation's subject
+    ('eidfrom' returned), object ('eidto' returned) or None.
+    Optimizing is only possible when X is used as direct subject/object of this
+    relation, else we may miss some necessary update.
+    """
+    for rel in formula_select.get_nodes(nodes.Relation):
+        if rel.r_type == rtype:
+            sub = rel.get_variable_parts()[0]
+            obj = rel.get_variable_parts()[1]
+            if sub.name == 'X':
+                return 'eidfrom'
+            elif obj.name == 'X':
+                return 'eidto'
+            else:
+                return None
+class _FormulaDependenciesMatrix(object):
+    """This class computes and represents the dependencies of computed attributes
+    towards relations and attributes
+    """
+    def __init__(self, schema):
+        """Analyzes the schema to compute the dependencies"""
+        # entity types holding some computed attribute {etype: [computed rdefs]}
+        self.computed_attribute_by_etype = defaultdict(list)
+        # depending entity types {dep. etype: {computed rdef: dep. etype attributes}}
+        self.computed_attribute_by_etype_attrs = defaultdict(lambda: defaultdict(set))
+        # depending relations def {dep. rdef: [computed rdefs]
+        self.computed_attribute_by_relation = defaultdict(list) # by rdef
+        # Walk through all attributes definitions
+        for rdef in schema.iter_computed_attributes():
+            self.computed_attribute_by_etype[rdef.subject.type].append(rdef)
+            # extract the relations it depends upon - `rdef.formula_select` is
+            # expected to have been set by finalize_computed_attributes
+            select = rdef.formula_select
+            for rel_node in select.get_nodes(nodes.Relation):
+                rschema = schema.rschema(rel_node.r_type)
+                lhs, rhs = rel_node.get_variable_parts()
+                for sol in select.solutions:
+                    subject_etype = sol[lhs.name]
+                    if isinstance(rhs, nodes.VariableRef):
+                        object_etypes = set(sol[rhs.name] for sol in select.solutions)
+                    else:
+                        object_etypes = rschema.objects(subject_etype)
+                    for object_etype in object_etypes:
+                        if rschema.final:
+                            attr_for_computations = self.computed_attribute_by_etype_attrs[subject_etype]
+                            attr_for_computations[rdef].add(rschema.type)
+                        else:
+                            depend_on_rdef = rschema.rdefs[subject_etype, object_etype]
+                            self.computed_attribute_by_relation[depend_on_rdef].append(rdef)
+    def generate_entity_creation_hooks(self):
+        for etype, computed_attributes in self.computed_attribute_by_etype.iteritems():
+            regid = 'computed_attribute.%s_created' % etype
+            selector = hook.is_instance(etype)
+            yield type('%sCreatedHook' % etype,
+                       (EntityWithCACreatedHook,),
+                       {'__regid__': regid,
+                        '__select__':  hook.Hook.__select__ & selector,
+                        'computed_attributes': computed_attributes})
+    def generate_relation_change_hooks(self):
+        for rdef, computed_attributes in self.computed_attribute_by_relation.iteritems():
+            regid = 'computed_attribute.%s_modified' % rdef.rtype
+            selector = hook.match_rtype(rdef.rtype.type,
+                                        frometypes=(rdef.subject.type,),
+                                        toetypes=(rdef.object.type,))
+            optimized_computed_attributes = []
+            for computed_rdef in computed_attributes:
+                optimized_computed_attributes.append(
+                    (computed_rdef,
+                     _optimize_on(computed_rdef.formula_select, rdef.rtype))
+                     )
+            yield type('%sModifiedHook' % rdef.rtype,
+                       (RelationInvolvedInCAModifiedHook,),
+                       {'__regid__': regid,
+                        '__select__':  hook.Hook.__select__ & selector,
+                        'optimized_computed_attributes': optimized_computed_attributes})
+    def generate_entity_update_hooks(self):
+        for etype, attributes_computed_attributes in self.computed_attribute_by_etype_attrs.iteritems():
+            regid = 'computed_attribute.%s_updated' % etype
+            selector = hook.is_instance(etype)
+            yield type('%sModifiedHook' % etype,
+                       (AttributeInvolvedInCAModifiedHook,),
+                       {'__regid__': regid,
+                        '__select__':  hook.Hook.__select__ & selector,
+                        'attributes_computed_attributes': attributes_computed_attributes})
+def registration_callback(vreg):
+    vreg.register_all(globals().values(), __name__)
+    dependencies = _FormulaDependenciesMatrix(vreg.schema)
+    for hook_class in dependencies.generate_entity_creation_hooks():
+        vreg.register(hook_class)
+    for hook_class in dependencies.generate_relation_change_hooks():
+        vreg.register(hook_class)
+    for hook_class in dependencies.generate_entity_update_hooks():
+        vreg.register(hook_class)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hooks/test/data-computed/schema.py	Tue Sep 16 16:39:23 2014 +0200
@@ -0,0 +1,31 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+# This file is part of CubicWeb.
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+# details.
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
+from yams.buildobjs import EntityType, String, Int, SubjectRelation
+THISYEAR = 2014
+class Person(EntityType):
+    name = String()
+    salaire = Int()
+    birth_year = Int(required=True)
+    travaille = SubjectRelation('Societe')
+    age = Int(formula='Any %d - D WHERE X birth_year D' % THISYEAR)
+class Societe(EntityType):
+    nom = String()
+    salaire_total = Int(formula='Any SUM(SA) GROUPBY X WHERE P travaille X, P salaire SA')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hooks/test/unittest_synccomputed.py	Tue Sep 16 16:39:23 2014 +0200
@@ -0,0 +1,139 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+# This file is part of CubicWeb.
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
+# details.
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
+"""unit tests for computed attributes/relations hooks"""
+from unittest import TestCase
+from yams.buildobjs import EntityType, String, Int, SubjectRelation
+from cubicweb.devtools.testlib import CubicWebTC
+from cubicweb.schema import build_schema_from_namespace
+class FormulaDependenciesMatrixTC(TestCase):
+    def simple_schema(self):
+        THISYEAR = 2014
+        class Person(EntityType):
+            name = String()
+            salary = Int()
+            birth_year = Int(required=True)
+            works_for = SubjectRelation('Company')
+            age = Int(formula='Any %d - D WHERE X birth_year D' % THISYEAR)
+        class Company(EntityType):
+            name = String()
+            total_salary = Int(formula='Any SUM(SA) GROUPBY X WHERE P works_for X, P salary SA')
+        schema = build_schema_from_namespace(vars().items())
+        return schema
+    def setUp(self):
+        from cubicweb.hooks.synccomputed import _FormulaDependenciesMatrix
+        self.schema = self.simple_schema()
+        self.dependencies = _FormulaDependenciesMatrix(self.schema)
+    def test_computed_attributes_by_etype(self):
+        comp_by_etype = self.dependencies.computed_attribute_by_etype
+        self.assertEqual(len(comp_by_etype), 2)
+        values = comp_by_etype['Person']
+        self.assertEqual(len(values), 1)
+        self.assertEqual(values[0].rtype, 'age')
+        values = comp_by_etype['Company']
+        self.assertEqual(len(values), 1)
+        self.assertEqual(values[0].rtype, 'total_salary')
+    def test_computed_attribute_by_relation(self):
+        comp_by_rdef = self.dependencies.computed_attribute_by_relation
+        self.assertEqual(len(comp_by_rdef), 1)
+        key, values = iter(comp_by_rdef.iteritems()).next()
+        self.assertEqual(key.rtype, 'works_for')
+        self.assertEqual(len(values), 1)
+        self.assertEqual(values[0].rtype, 'total_salary')
+    def test_computed_attribute_by_etype_attrs(self):
+        comp_by_attr = self.dependencies.computed_attribute_by_etype_attrs
+        self.assertEqual(len(comp_by_attr), 1)
+        values = comp_by_attr['Person']
+        self.assertEqual(len(values), 2)
+        values = set((rdef.formula, tuple(v))
+                     for rdef, v in values.iteritems())
+        self.assertEquals(values,
+                          set((('Any 2014 - D WHERE X birth_year D', tuple(('birth_year',))),
+                               ('Any SUM(SA) GROUPBY X WHERE P works_for X, P salary SA', tuple(('salary',)))))
+                          )
+class ComputedAttributeTC(CubicWebTC):
+    appid = 'data-computed'
+    def setup_entities(self, req):
+        self.societe = req.create_entity('Societe', nom=u'Foo')
+        req.create_entity('Person', name=u'Titi', salaire=1000,
+                          travaille=self.societe, birth_year=2001)
+        self.tata = req.create_entity('Person', name=u'Tata', salaire=2000,
+                                      travaille=self.societe, birth_year=1990)
+    def test_update_on_add_remove_relation(self):
+        """check the rewriting of a computed attribute"""
+        with self.admin_access.web_request() as req:
+            self.setup_entities(req)
+            req.cnx.commit()
+            rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+            self.assertEqual(rset[0][0], 3000)
+            # Add relation.
+            toto = req.create_entity('Person', name=u'Toto', salaire=1500,
+                                   travaille=self.societe, birth_year=1988)
+            req.cnx.commit()
+            rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+            self.assertEqual(rset[0][0], 4500)
+            # Delete relation.
+            toto.cw_set(travaille=None)
+            req.cnx.commit()
+            rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+            self.assertEqual(rset[0][0], 3000)
+    def test_recompute_on_attribute_update(self):
+        """check the modification of an attribute triggers the update of the
+        computed attributes that depend on it"""
+        with self.admin_access.web_request() as req:
+            self.setup_entities(req)
+            req.cnx.commit()
+            rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+            self.assertEqual(rset[0][0], 3000)
+            # Update attribute.
+            self.tata.cw_set(salaire=1000)
+            req.cnx.commit()
+            rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+            self.assertEqual(rset[0][0], 2000)
+    def test_init_on_entity_creation(self):
+        """check the computed attribute is initialized on entity creation"""
+        with self.admin_access.web_request() as req:
+            p = req.create_entity('Person', name=u'Tata', salaire=2000,
+                                  birth_year=1990)
+            req.cnx.commit()
+            rset = req.execute('Any A, X WHERE X age A, X name "Tata"')
+            self.assertEqual(rset[0][0], 2014 - 1990)
+if __name__ == '__main__':
+    from logilab.common.testlib import unittest_main
+    unittest_main()
--- a/schema.py	Mon Apr 28 14:11:23 2014 +0200
+++ b/schema.py	Tue Sep 16 16:39:23 2014 +0200
@@ -144,6 +144,44 @@
     return u', '.join(' '.join(expr.split()) for expr in rqlstring.split(','))
+def _check_valid_formula(rdef, formula_rqlst):
+    """Check the formula is a valid RQL query with some restriction (no union,
+    single selected node, etc.), raise BadSchemaDefinition if not
+    """
+    if len(formula_rqlst.children) != 1:
+        raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+                                  'can not use UNION in formula %(form)r' %
+                                  {'attr' : rdef.rtype,
+                                   'etype' : rdef.subject.type,
+                                   'form' : rdef.formula})
+    select = formula_rqlst.children[0]
+    if len(select.selection) != 1:
+        raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+                                  'can only select one term in formula %(form)r' %
+                                  {'attr' : rdef.rtype,
+                                   'etype' : rdef.subject.type,
+                                   'form' : rdef.formula})
+    term = select.selection[0]
+    types = set(term.get_type(sol) for sol in select.solutions)
+    if len(types) != 1:
+        raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+                                  'multiple possible types (%(types)s) for formula %(form)r' %
+                                  {'attr' : rdef.rtype,
+                                   'etype' : rdef.subject.type,
+                                   'types' : list(types),
+                                   'form' : rdef.formula})
+    computed_type = types.pop()
+    expected_type = rdef.object.type
+    if computed_type != expected_type:
+        raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+                                  'computed attribute type (%(comp_type)s) mismatch with '
+                                  'specified type (%(attr_type)s)' %
+                                  {'attr' : rdef.rtype,
+                                   'etype' : rdef.subject.type,
+                                   'comp_type' : computed_type,
+                                   'attr_type' : expected_type})
 class RQLExpression(object):
     """Base class for RQL expression used in schema (constraints and
@@ -1010,6 +1048,12 @@
     def schema_by_eid(self, eid):
         return self._eid_index[eid]
+    def iter_computed_attributes(self):
+        for relation in self.relations():
+            for rdef in relation.rdefs.itervalues():
+                if rdef.final and rdef.formula is not None:
+                    yield rdef
     def iter_computed_relations(self):
         for relation in self.relations():
             if relation.rule:
@@ -1021,51 +1065,17 @@
     def finalize_computed_attributes(self):
-        """Check consistency of computed attributes types"""
+        """Check computed attributes validity (if any), else raise
+        `BadSchemaDefinition`
+        """
         analyzer = ETypeResolver(self)
-        for relation in self.relations():
-            for rdef in relation.rdefs.itervalues():
-                if rdef.final and rdef.formula is not None:
-                    computed_etype = rdef.subject.type
-                    computed_attr = rdef.rtype
-                    rqlst = parse(rdef.formula)
-                    if len(rqlst.children) != 1:
-                        raise BadSchemaDefinition(
-                            'computed attribute %(attr)s on %(etype)s: '
-                            'can not use UNION in formula %(form)r' %
-                            dict(attr=computed_attr,
-                                 etype=computed_etype,
-                                 form=rdef.formula))
-                    select = rqlst.children[0]
-                    analyzer.visit(select)
-                    if len(select.selection) != 1:
-                        raise BadSchemaDefinition(
-                            'computed attribute %(attr)s on %(etype)s: '
-                            'can only select one term in formula %(form)r' %
-                            dict(attr=computed_attr,
-                                 etype=computed_etype,
-                                 form=rdef.formula))
-                    term = select.selection[0]
-                    types = set(term.get_type(sol) for sol in select.solutions)
-                    if len(types) != 1:
-                        raise BadSchemaDefinition(
-                            'computed attribute %(attr)s on %(etype)s: '
-                            'multiple possible types (%(types)s) for formula %(form)s' %
-                            dict(attr=computed_attr,
-                                 etype=computed_etype,
-                                 types=list(types),
-                                 form=rdef.formula))
-                    computed_type = types.pop()
-                    expected_type = rdef.object.type
-                    if computed_type != expected_type:
-                        raise BadSchemaDefinition(
-                            'computed attribute %(attr)s on %(etype)s: '
-                            'computed attribute type (%(comp_type)s) mismatch with '
-                            'specified type (%(attr_type)s)' %
-                            dict(attr=computed_attr,
-                                 etype=computed_etype,
-                                 comp_type=computed_type,
-                                 attr_type=expected_type))
+        for rdef in self.iter_computed_attributes():
+            rqlst = parse(rdef.formula)
+            select = rqlst.children[0]
+            analyzer.visit(select)
+            _check_valid_formula(rdef, rqlst)
+            rdef.formula_select = select # avoid later recomputation
     def finalize_computed_relations(self):
         """Build relation definitions for computed relations
@@ -1353,6 +1363,7 @@
     # only defining here to prevent pylint from complaining
     info = warning = error = critical = exception = debug = lambda msg,*a,**kw: None
 set_log_methods(CubicWebSchemaLoader, getLogger('cubicweb.schemaloader'))
 set_log_methods(BootstrapSchemaLoader, getLogger('cubicweb.bootstrapschemaloader'))
 set_log_methods(RQLExpression, getLogger('cubicweb.schema'))
--- a/test/unittest_schema.py	Mon Apr 28 14:11:23 2014 +0200
+++ b/test/unittest_schema.py	Tue Sep 16 16:39:23 2014 +0200
@@ -539,5 +539,6 @@
                              sorted([(r.rtype.type, r.subject.type, r.object.type, role)
                                      for r, role in sorted(schema[etype].composite_rdef_roles)])
 if __name__ == '__main__':