[CWEP002] Add support for computed attribute synchronization
Related to #3546717.
Test and handle the behaviour with several formulas and identified use cases. To
do so add a birth year and a computed attribute age to Person in the test
schema.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hooks/synccomputed.py Tue Sep 16 16:39:23 2014 +0200
@@ -0,0 +1,227 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+#
+# This file is part of CubicWeb.
+#
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
+"""Hooks for synchronizing computed attributes"""
+
+__docformat__ = "restructuredtext en"
+_ = unicode
+
+from collections import defaultdict
+
+from rql import nodes
+
+from cubicweb.server import hook
+
+
+class RecomputeAttributeOperation(hook.DataOperationMixIn, hook.Operation):
+ """Operation to recompute caches of computed attribute at commit time,
+ depending on what's have been modified in the transaction and avoiding to
+ recompute twice the same attribute
+ """
+ containercls = dict
+ def add_data(self, computed_attribute, eid=None):
+ try:
+ self._container[computed_attribute].add(eid)
+ except KeyError:
+ self._container[computed_attribute] = set((eid,))
+
+ def precommit_event(self):
+ for computed_attribute_rdef, eids in self.get_data().iteritems():
+ attr = computed_attribute_rdef.rtype
+ formula = computed_attribute_rdef.formula
+ rql = formula.replace('Any ', 'Any X, ', 1)
+ kwargs = None
+ # add constraint on X to the formula
+ if None in eids : # recompute for all etype if None is found
+ rql += ', X is %s' % computed_attribute_rdef.subject
+ elif len(eids) == 1:
+ rql += ', X eid %(x)s'
+ kwargs = {'x': eids.pop()}
+ else:
+ rql += ', X eid IN (%s)' % ', '.join((str(eid) for eid in eids))
+ update_rql = 'SET X %s %%(value)s WHERE X eid %%(x)s' % attr
+ for eid, value in self.cnx.execute(rql, kwargs):
+ self.cnx.execute(update_rql, {'value': value, 'x': eid})
+
+
+class EntityWithCACreatedHook(hook.Hook):
+ """When creating an entity that has some computed attribute, those
+ attributes have to be computed.
+
+ Concret class of this hook are generated at registration time by
+ introspecting the schema.
+ """
+ __abstract__ = True
+ events = ('after_add_entity',)
+ # list of computed attribute rdefs that have to be recomputed
+ computed_attributes = None
+
+ def __call__(self):
+ for rdef in self.computed_attributes:
+ RecomputeAttributeOperation.get_instance(self._cw).add_data(
+ rdef, self.entity.eid)
+
+
+class RelationInvolvedInCAModifiedHook(hook.Hook):
+ """When some relation used in a computed attribute is updated, those
+ attributes have to be recomputed.
+
+ Concret class of this hook are generated at registration time by
+ introspecting the schema.
+ """
+ __abstract__ = True
+ events = ('after_add_relation', 'before_delete_relation')
+ # list of (computed attribute rdef, optimize_on) that have to be recomputed
+ optimized_computed_attributes = None
+
+ def __call__(self):
+ for rdef, optimize_on in self.optimized_computed_attributes:
+ if optimize_on is None:
+ eid = None
+ else:
+ eid = getattr(self, optimize_on)
+ RecomputeAttributeOperation.get_instance(self._cw).add_data(rdef, eid)
+
+
+class AttributeInvolvedInCAModifiedHook(hook.Hook):
+ """When some attribute used in a computed attribute is updated, those
+ attributes have to be recomputed.
+
+ Concret class of this hook are generated at registration time by
+ introspecting the schema.
+ """
+ __abstract__ = True
+ events = ('after_update_entity',)
+ # list of (computed attribute rdef, attributes of this entity type involved)
+ # that may have to be recomputed
+ attributes_computed_attributes = None
+
+ def __call__(self):
+ edited_attributes = frozenset(self.entity.cw_edited)
+ for rdef, used_attributes in self.attributes_computed_attributes.iteritems():
+ if edited_attributes.intersection(used_attributes):
+ # XXX optimize if the modified attributes belong to the same
+ # entity as the computed attribute
+ RecomputeAttributeOperation.get_instance(self._cw).add_data(rdef)
+
+
+# code generation at registration time #########################################
+
+def _optimize_on(formula_select, rtype):
+ """Given a formula and some rtype, tells whether on update of the given
+ relation, formula may be recomputed only for rhe relation's subject
+ ('eidfrom' returned), object ('eidto' returned) or None.
+
+ Optimizing is only possible when X is used as direct subject/object of this
+ relation, else we may miss some necessary update.
+ """
+ for rel in formula_select.get_nodes(nodes.Relation):
+ if rel.r_type == rtype:
+ sub = rel.get_variable_parts()[0]
+ obj = rel.get_variable_parts()[1]
+ if sub.name == 'X':
+ return 'eidfrom'
+ elif obj.name == 'X':
+ return 'eidto'
+ else:
+ return None
+
+
+class _FormulaDependenciesMatrix(object):
+ """This class computes and represents the dependencies of computed attributes
+ towards relations and attributes
+ """
+
+ def __init__(self, schema):
+ """Analyzes the schema to compute the dependencies"""
+ # entity types holding some computed attribute {etype: [computed rdefs]}
+ self.computed_attribute_by_etype = defaultdict(list)
+ # depending entity types {dep. etype: {computed rdef: dep. etype attributes}}
+ self.computed_attribute_by_etype_attrs = defaultdict(lambda: defaultdict(set))
+ # depending relations def {dep. rdef: [computed rdefs]
+ self.computed_attribute_by_relation = defaultdict(list) # by rdef
+ # Walk through all attributes definitions
+ for rdef in schema.iter_computed_attributes():
+ self.computed_attribute_by_etype[rdef.subject.type].append(rdef)
+ # extract the relations it depends upon - `rdef.formula_select` is
+ # expected to have been set by finalize_computed_attributes
+ select = rdef.formula_select
+ for rel_node in select.get_nodes(nodes.Relation):
+ rschema = schema.rschema(rel_node.r_type)
+ lhs, rhs = rel_node.get_variable_parts()
+ for sol in select.solutions:
+ subject_etype = sol[lhs.name]
+ if isinstance(rhs, nodes.VariableRef):
+ object_etypes = set(sol[rhs.name] for sol in select.solutions)
+ else:
+ object_etypes = rschema.objects(subject_etype)
+ for object_etype in object_etypes:
+ if rschema.final:
+ attr_for_computations = self.computed_attribute_by_etype_attrs[subject_etype]
+ attr_for_computations[rdef].add(rschema.type)
+ else:
+ depend_on_rdef = rschema.rdefs[subject_etype, object_etype]
+ self.computed_attribute_by_relation[depend_on_rdef].append(rdef)
+
+ def generate_entity_creation_hooks(self):
+ for etype, computed_attributes in self.computed_attribute_by_etype.iteritems():
+ regid = 'computed_attribute.%s_created' % etype
+ selector = hook.is_instance(etype)
+ yield type('%sCreatedHook' % etype,
+ (EntityWithCACreatedHook,),
+ {'__regid__': regid,
+ '__select__': hook.Hook.__select__ & selector,
+ 'computed_attributes': computed_attributes})
+
+ def generate_relation_change_hooks(self):
+ for rdef, computed_attributes in self.computed_attribute_by_relation.iteritems():
+ regid = 'computed_attribute.%s_modified' % rdef.rtype
+ selector = hook.match_rtype(rdef.rtype.type,
+ frometypes=(rdef.subject.type,),
+ toetypes=(rdef.object.type,))
+ optimized_computed_attributes = []
+ for computed_rdef in computed_attributes:
+ optimized_computed_attributes.append(
+ (computed_rdef,
+ _optimize_on(computed_rdef.formula_select, rdef.rtype))
+ )
+ yield type('%sModifiedHook' % rdef.rtype,
+ (RelationInvolvedInCAModifiedHook,),
+ {'__regid__': regid,
+ '__select__': hook.Hook.__select__ & selector,
+ 'optimized_computed_attributes': optimized_computed_attributes})
+
+ def generate_entity_update_hooks(self):
+ for etype, attributes_computed_attributes in self.computed_attribute_by_etype_attrs.iteritems():
+ regid = 'computed_attribute.%s_updated' % etype
+ selector = hook.is_instance(etype)
+ yield type('%sModifiedHook' % etype,
+ (AttributeInvolvedInCAModifiedHook,),
+ {'__regid__': regid,
+ '__select__': hook.Hook.__select__ & selector,
+ 'attributes_computed_attributes': attributes_computed_attributes})
+
+
+def registration_callback(vreg):
+ vreg.register_all(globals().values(), __name__)
+ dependencies = _FormulaDependenciesMatrix(vreg.schema)
+ for hook_class in dependencies.generate_entity_creation_hooks():
+ vreg.register(hook_class)
+ for hook_class in dependencies.generate_relation_change_hooks():
+ vreg.register(hook_class)
+ for hook_class in dependencies.generate_entity_update_hooks():
+ vreg.register(hook_class)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hooks/test/data-computed/schema.py Tue Sep 16 16:39:23 2014 +0200
@@ -0,0 +1,31 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+#
+# This file is part of CubicWeb.
+#
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
+from yams.buildobjs import EntityType, String, Int, SubjectRelation
+
+THISYEAR = 2014
+
+class Person(EntityType):
+ name = String()
+ salaire = Int()
+ birth_year = Int(required=True)
+ travaille = SubjectRelation('Societe')
+ age = Int(formula='Any %d - D WHERE X birth_year D' % THISYEAR)
+
+class Societe(EntityType):
+ nom = String()
+ salaire_total = Int(formula='Any SUM(SA) GROUPBY X WHERE P travaille X, P salaire SA')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/hooks/test/unittest_synccomputed.py Tue Sep 16 16:39:23 2014 +0200
@@ -0,0 +1,139 @@
+# copyright 2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
+#
+# This file is part of CubicWeb.
+#
+# CubicWeb is free software: you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free
+# Software Foundation, either version 2.1 of the License, or (at your option)
+# any later version.
+#
+# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Lesser General Public License along
+# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
+"""unit tests for computed attributes/relations hooks"""
+
+from unittest import TestCase
+
+from yams.buildobjs import EntityType, String, Int, SubjectRelation
+
+from cubicweb.devtools.testlib import CubicWebTC
+from cubicweb.schema import build_schema_from_namespace
+
+
+class FormulaDependenciesMatrixTC(TestCase):
+
+ def simple_schema(self):
+ THISYEAR = 2014
+
+ class Person(EntityType):
+ name = String()
+ salary = Int()
+ birth_year = Int(required=True)
+ works_for = SubjectRelation('Company')
+ age = Int(formula='Any %d - D WHERE X birth_year D' % THISYEAR)
+
+ class Company(EntityType):
+ name = String()
+ total_salary = Int(formula='Any SUM(SA) GROUPBY X WHERE P works_for X, P salary SA')
+
+ schema = build_schema_from_namespace(vars().items())
+ return schema
+
+ def setUp(self):
+ from cubicweb.hooks.synccomputed import _FormulaDependenciesMatrix
+ self.schema = self.simple_schema()
+ self.dependencies = _FormulaDependenciesMatrix(self.schema)
+
+ def test_computed_attributes_by_etype(self):
+ comp_by_etype = self.dependencies.computed_attribute_by_etype
+ self.assertEqual(len(comp_by_etype), 2)
+ values = comp_by_etype['Person']
+ self.assertEqual(len(values), 1)
+ self.assertEqual(values[0].rtype, 'age')
+ values = comp_by_etype['Company']
+ self.assertEqual(len(values), 1)
+ self.assertEqual(values[0].rtype, 'total_salary')
+
+ def test_computed_attribute_by_relation(self):
+ comp_by_rdef = self.dependencies.computed_attribute_by_relation
+ self.assertEqual(len(comp_by_rdef), 1)
+ key, values = iter(comp_by_rdef.iteritems()).next()
+ self.assertEqual(key.rtype, 'works_for')
+ self.assertEqual(len(values), 1)
+ self.assertEqual(values[0].rtype, 'total_salary')
+
+ def test_computed_attribute_by_etype_attrs(self):
+ comp_by_attr = self.dependencies.computed_attribute_by_etype_attrs
+ self.assertEqual(len(comp_by_attr), 1)
+ values = comp_by_attr['Person']
+ self.assertEqual(len(values), 2)
+ values = set((rdef.formula, tuple(v))
+ for rdef, v in values.iteritems())
+ self.assertEquals(values,
+ set((('Any 2014 - D WHERE X birth_year D', tuple(('birth_year',))),
+ ('Any SUM(SA) GROUPBY X WHERE P works_for X, P salary SA', tuple(('salary',)))))
+ )
+
+
+class ComputedAttributeTC(CubicWebTC):
+ appid = 'data-computed'
+
+ def setup_entities(self, req):
+ self.societe = req.create_entity('Societe', nom=u'Foo')
+ req.create_entity('Person', name=u'Titi', salaire=1000,
+ travaille=self.societe, birth_year=2001)
+ self.tata = req.create_entity('Person', name=u'Tata', salaire=2000,
+ travaille=self.societe, birth_year=1990)
+
+
+ def test_update_on_add_remove_relation(self):
+ """check the rewriting of a computed attribute"""
+ with self.admin_access.web_request() as req:
+ self.setup_entities(req)
+ req.cnx.commit()
+ rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+ self.assertEqual(rset[0][0], 3000)
+ # Add relation.
+ toto = req.create_entity('Person', name=u'Toto', salaire=1500,
+ travaille=self.societe, birth_year=1988)
+ req.cnx.commit()
+ rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+ self.assertEqual(rset[0][0], 4500)
+ # Delete relation.
+ toto.cw_set(travaille=None)
+ req.cnx.commit()
+ rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+ self.assertEqual(rset[0][0], 3000)
+
+ def test_recompute_on_attribute_update(self):
+ """check the modification of an attribute triggers the update of the
+ computed attributes that depend on it"""
+ with self.admin_access.web_request() as req:
+ self.setup_entities(req)
+ req.cnx.commit()
+ rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+ self.assertEqual(rset[0][0], 3000)
+ # Update attribute.
+ self.tata.cw_set(salaire=1000)
+ req.cnx.commit()
+ rset = req.execute('Any S WHERE X salaire_total S, X nom "Foo"')
+ self.assertEqual(rset[0][0], 2000)
+
+ def test_init_on_entity_creation(self):
+ """check the computed attribute is initialized on entity creation"""
+ with self.admin_access.web_request() as req:
+ p = req.create_entity('Person', name=u'Tata', salaire=2000,
+ birth_year=1990)
+ req.cnx.commit()
+ rset = req.execute('Any A, X WHERE X age A, X name "Tata"')
+ self.assertEqual(rset[0][0], 2014 - 1990)
+
+
+if __name__ == '__main__':
+ from logilab.common.testlib import unittest_main
+ unittest_main()
--- a/schema.py Mon Apr 28 14:11:23 2014 +0200
+++ b/schema.py Tue Sep 16 16:39:23 2014 +0200
@@ -144,6 +144,44 @@
return u', '.join(' '.join(expr.split()) for expr in rqlstring.split(','))
+def _check_valid_formula(rdef, formula_rqlst):
+ """Check the formula is a valid RQL query with some restriction (no union,
+ single selected node, etc.), raise BadSchemaDefinition if not
+ """
+ if len(formula_rqlst.children) != 1:
+ raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+ 'can not use UNION in formula %(form)r' %
+ {'attr' : rdef.rtype,
+ 'etype' : rdef.subject.type,
+ 'form' : rdef.formula})
+ select = formula_rqlst.children[0]
+ if len(select.selection) != 1:
+ raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+ 'can only select one term in formula %(form)r' %
+ {'attr' : rdef.rtype,
+ 'etype' : rdef.subject.type,
+ 'form' : rdef.formula})
+ term = select.selection[0]
+ types = set(term.get_type(sol) for sol in select.solutions)
+ if len(types) != 1:
+ raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+ 'multiple possible types (%(types)s) for formula %(form)r' %
+ {'attr' : rdef.rtype,
+ 'etype' : rdef.subject.type,
+ 'types' : list(types),
+ 'form' : rdef.formula})
+ computed_type = types.pop()
+ expected_type = rdef.object.type
+ if computed_type != expected_type:
+ raise BadSchemaDefinition('computed attribute %(attr)s on %(etype)s: '
+ 'computed attribute type (%(comp_type)s) mismatch with '
+ 'specified type (%(attr_type)s)' %
+ {'attr' : rdef.rtype,
+ 'etype' : rdef.subject.type,
+ 'comp_type' : computed_type,
+ 'attr_type' : expected_type})
+
+
class RQLExpression(object):
"""Base class for RQL expression used in schema (constraints and
permissions)
@@ -1010,6 +1048,12 @@
def schema_by_eid(self, eid):
return self._eid_index[eid]
+ def iter_computed_attributes(self):
+ for relation in self.relations():
+ for rdef in relation.rdefs.itervalues():
+ if rdef.final and rdef.formula is not None:
+ yield rdef
+
def iter_computed_relations(self):
for relation in self.relations():
if relation.rule:
@@ -1021,51 +1065,17 @@
self.finalize_computed_relations()
def finalize_computed_attributes(self):
- """Check consistency of computed attributes types"""
+ """Check computed attributes validity (if any), else raise
+ `BadSchemaDefinition`
+ """
analyzer = ETypeResolver(self)
- for relation in self.relations():
- for rdef in relation.rdefs.itervalues():
- if rdef.final and rdef.formula is not None:
- computed_etype = rdef.subject.type
- computed_attr = rdef.rtype
- rqlst = parse(rdef.formula)
- if len(rqlst.children) != 1:
- raise BadSchemaDefinition(
- 'computed attribute %(attr)s on %(etype)s: '
- 'can not use UNION in formula %(form)r' %
- dict(attr=computed_attr,
- etype=computed_etype,
- form=rdef.formula))
- select = rqlst.children[0]
- analyzer.visit(select)
- if len(select.selection) != 1:
- raise BadSchemaDefinition(
- 'computed attribute %(attr)s on %(etype)s: '
- 'can only select one term in formula %(form)r' %
- dict(attr=computed_attr,
- etype=computed_etype,
- form=rdef.formula))
- term = select.selection[0]
- types = set(term.get_type(sol) for sol in select.solutions)
- if len(types) != 1:
- raise BadSchemaDefinition(
- 'computed attribute %(attr)s on %(etype)s: '
- 'multiple possible types (%(types)s) for formula %(form)s' %
- dict(attr=computed_attr,
- etype=computed_etype,
- types=list(types),
- form=rdef.formula))
- computed_type = types.pop()
- expected_type = rdef.object.type
- if computed_type != expected_type:
- raise BadSchemaDefinition(
- 'computed attribute %(attr)s on %(etype)s: '
- 'computed attribute type (%(comp_type)s) mismatch with '
- 'specified type (%(attr_type)s)' %
- dict(attr=computed_attr,
- etype=computed_etype,
- comp_type=computed_type,
- attr_type=expected_type))
+ for rdef in self.iter_computed_attributes():
+ rqlst = parse(rdef.formula)
+ select = rqlst.children[0]
+ analyzer.visit(select)
+ _check_valid_formula(rdef, rqlst)
+ rdef.formula_select = select # avoid later recomputation
+
def finalize_computed_relations(self):
"""Build relation definitions for computed relations
@@ -1353,6 +1363,7 @@
# only defining here to prevent pylint from complaining
info = warning = error = critical = exception = debug = lambda msg,*a,**kw: None
+
set_log_methods(CubicWebSchemaLoader, getLogger('cubicweb.schemaloader'))
set_log_methods(BootstrapSchemaLoader, getLogger('cubicweb.bootstrapschemaloader'))
set_log_methods(RQLExpression, getLogger('cubicweb.schema'))
--- a/test/unittest_schema.py Mon Apr 28 14:11:23 2014 +0200
+++ b/test/unittest_schema.py Tue Sep 16 16:39:23 2014 +0200
@@ -539,5 +539,6 @@
sorted([(r.rtype.type, r.subject.type, r.object.type, role)
for r, role in sorted(schema[etype].composite_rdef_roles)])
+
if __name__ == '__main__':
unittest_main()