[repo] improve planning of insert/update queries: do not select affected constants so the don't go and back to/from the source.
--- a/server/querier.py Wed Mar 03 17:56:04 2010 +0100
+++ b/server/querier.py Wed Mar 03 17:59:05 2010 +0100
@@ -22,9 +22,8 @@
from cubicweb.server.utils import cleanup_solutions
from cubicweb.server.rqlannotation import SQLGenAnnotator, set_qdata
-from cubicweb.server.ssplanner import add_types_restriction
+from cubicweb.server.ssplanner import READ_ONLY_RTYPES, add_types_restriction
-READ_ONLY_RTYPES = set(('eid', 'has_text', 'is', 'is_instance_of', 'identity'))
def empty_rset(rql, args, rqlst=None):
"""build an empty result set object"""
@@ -377,39 +376,6 @@
self._r_obj_index = {}
self._expanded_r_defs = {}
- def relation_definitions(self, rqlst, to_build):
- """add constant values to entity def, mark variables to be selected
- """
- to_select = {}
- for relation in rqlst.main_relations:
- lhs, rhs = relation.get_variable_parts()
- rtype = relation.r_type
- if rtype in READ_ONLY_RTYPES:
- raise QueryError("can't assign to %s" % rtype)
- try:
- edef = to_build[str(lhs)]
- except KeyError:
- # lhs var is not to build, should be selected and added as an
- # object relation
- edef = to_build[str(rhs)]
- to_select.setdefault(edef, []).append((rtype, lhs, 1))
- else:
- if isinstance(rhs, Constant) and not rhs.uid:
- # add constant values to entity def
- value = rhs.eval(self.args)
- eschema = edef.e_schema
- attrtype = eschema.subjrels[rtype].objects(eschema)[0]
- if attrtype == 'Password' and isinstance(value, unicode):
- value = value.encode('UTF8')
- edef[rtype] = value
- elif to_build.has_key(str(rhs)):
- # create a relation between two newly created variables
- self.add_relation_def((edef, rtype, to_build[rhs.name]))
- else:
- to_select.setdefault(edef, []).append( (rtype, rhs, 0) )
- return to_select
-
-
def add_entity_def(self, edef):
"""add an entity definition to build"""
edef.querier_pending_relations = {}
--- a/server/ssplanner.py Wed Mar 03 17:56:04 2010 +0100
+++ b/server/ssplanner.py Wed Mar 03 17:59:05 2010 +0100
@@ -10,12 +10,72 @@
from copy import copy
from rql.stmts import Union, Select
-from rql.nodes import Constant
+from rql.nodes import Constant, Relation
from cubicweb import QueryError, typed_eid
from cubicweb.schema import VIRTUAL_RTYPES
from cubicweb.rqlrewrite import add_types_restriction
+READ_ONLY_RTYPES = set(('eid', 'has_text', 'is', 'is_instance_of', 'identity'))
+
+_CONSTANT = object()
+_FROM_SUBSTEP = object()
+
+def _extract_const_attributes(plan, rqlst, to_build):
+ """add constant values to entity def, mark variables to be selected
+ """
+ to_select = {}
+ for relation in rqlst.main_relations:
+ lhs, rhs = relation.get_variable_parts()
+ rtype = relation.r_type
+ if rtype in READ_ONLY_RTYPES:
+ raise QueryError("can't assign to %s" % rtype)
+ try:
+ edef = to_build[str(lhs)]
+ except KeyError:
+ # lhs var is not to build, should be selected and added as an
+ # object relation
+ edef = to_build[str(rhs)]
+ to_select.setdefault(edef, []).append((rtype, lhs, 1))
+ else:
+ if isinstance(rhs, Constant) and not rhs.uid:
+ # add constant values to entity def
+ value = rhs.eval(plan.args)
+ eschema = edef.e_schema
+ attrtype = eschema.subjrels[rtype].objects(eschema)[0]
+ if attrtype == 'Password' and isinstance(value, unicode):
+ value = value.encode('UTF8')
+ edef[rtype] = value
+ elif to_build.has_key(str(rhs)):
+ # create a relation between two newly created variables
+ plan.add_relation_def((edef, rtype, to_build[rhs.name]))
+ else:
+ to_select.setdefault(edef, []).append( (rtype, rhs, 0) )
+ return to_select
+
+def _extract_eid_consts(plan, rqlst):
+ """return a dict mapping rqlst variable object to their eid if specified in
+ the syntax tree
+ """
+ session = plan.session
+ eschema = session.vreg.schema.eschema
+ if rqlst.where is None:
+ return {}
+ eidconsts = {}
+ neweids = session.transaction_data.get('neweids', ())
+ for rel in rqlst.where.get_nodes(Relation):
+ if rel.r_type == 'eid' and not rel.neged(strict=True):
+ lhs, rhs = rel.get_variable_parts()
+ if isinstance(rhs, Constant):
+ eid = typed_eid(rhs.eval(plan.args))
+ # check read permission here since it may not be done by
+ # the generated select substep if not emited (eg nothing
+ # to be selected)
+ if eid not in neweids:
+ eschema(session.describe(eid)[0]).check_perm(session, 'read')
+ eidconsts[lhs.variable] = eid
+ return eidconsts
+
class SSPlanner(object):
"""SingleSourcePlanner: build execution plan for rql queries
@@ -56,34 +116,38 @@
to_build[var.name] = etype_class(etype)(session)
plan.add_entity_def(to_build[var.name])
# add constant values to entity def, mark variables to be selected
- to_select = plan.relation_definitions(rqlst, to_build)
+ to_select = _extract_const_attributes(plan, rqlst, to_build)
# add necessary steps to add relations and update attributes
step = InsertStep(plan) # insert each entity and its relations
- step.children += self._compute_relation_steps(plan, rqlst.solutions,
- rqlst.where, to_select)
+ step.children += self._compute_relation_steps(plan, rqlst, to_select)
return (step,)
- def _compute_relation_steps(self, plan, solutions, restriction, to_select):
+ def _compute_relation_steps(self, plan, rqlst, to_select):
"""handle the selection of relations for an insert query"""
+ eidconsts = _extract_eid_consts(plan, rqlst)
for edef, rdefs in to_select.items():
# create a select rql st to fetch needed data
select = Select()
eschema = edef.e_schema
- for i in range(len(rdefs)):
- rtype, term, reverse = rdefs[i]
- select.append_selected(term.copy(select))
+ for i, (rtype, term, reverse) in enumerate(rdefs):
+ if getattr(term, 'variable', None) in eidconsts:
+ value = eidconsts[term.variable]
+ else:
+ select.append_selected(term.copy(select))
+ value = _FROM_SUBSTEP
if reverse:
- rdefs[i] = rtype, RelationsStep.REVERSE_RELATION
+ rdefs[i] = (rtype, InsertRelationsStep.REVERSE_RELATION, value)
else:
rschema = eschema.subjrels[rtype]
if rschema.final or rschema.inlined:
- rdefs[i] = rtype, RelationsStep.FINAL
+ rdefs[i] = (rtype, InsertRelationsStep.FINAL, value)
else:
- rdefs[i] = rtype, RelationsStep.RELATION
- if restriction is not None:
- select.set_where(restriction.copy(select))
- step = RelationsStep(plan, edef, rdefs)
- step.children += self._select_plan(plan, select, solutions)
+ rdefs[i] = (rtype, InsertRelationsStep.RELATION, value)
+ step = InsertRelationsStep(plan, edef, rdefs)
+ if select.selection:
+ if rqlst.where is not None:
+ select.set_where(rqlst.where.copy(select))
+ step.children += self._select_plan(plan, select, rqlst.solutions)
yield step
def build_delete_plan(self, plan, rqlst):
@@ -127,37 +191,62 @@
def build_set_plan(self, plan, rqlst):
"""get an execution plan from an SET RQL query"""
- select = Select()
- # extract variables to add to the selection
- selected_index = {}
- index = 0
- relations, attrrelations = [], []
getrschema = self.schema.rschema
- for relation in rqlst.main_relations:
+ select = Select() # potential substep query
+ selectedidx = {} # local state
+ attributes = set() # edited attributes
+ updatedefs = [] # definition of update attributes/relations
+ selidx = residx = 0 # substep selection / resulting rset indexes
+ # search for eid const in the WHERE clause
+ eidconsts = _extract_eid_consts(plan, rqlst)
+ # build `updatedefs` describing things to update and add necessary
+ # variables to the substep selection
+ for i, relation in enumerate(rqlst.main_relations):
if relation.r_type in VIRTUAL_RTYPES:
raise QueryError('can not assign to %r relation'
% relation.r_type)
lhs, rhs = relation.get_variable_parts()
- if not lhs.as_string('utf-8') in selected_index:
- select.append_selected(lhs.copy(select))
- selected_index[lhs.as_string('utf-8')] = index
- index += 1
- if not rhs.as_string('utf-8') in selected_index:
- select.append_selected(rhs.copy(select))
- selected_index[rhs.as_string('utf-8')] = index
- index += 1
+ lhskey = lhs.as_string('utf-8')
+ if not lhskey in selectedidx:
+ if lhs.variable in eidconsts:
+ eid = eidconsts[lhs.variable]
+ lhsinfo = (_CONSTANT, eid, residx)
+ else:
+ select.append_selected(lhs.copy(select))
+ lhsinfo = (_FROM_SUBSTEP, selidx, residx)
+ selidx += 1
+ residx += 1
+ selectedidx[lhskey] = lhsinfo
+ else:
+ lhsinfo = selectedidx[lhskey][:-1] + (None,)
+ rhskey = rhs.as_string('utf-8')
+ if not rhskey in selectedidx:
+ if isinstance(rhs, Constant):
+ rhsinfo = (_CONSTANT, rhs.eval(plan.args), residx)
+ elif getattr(rhs, 'variable', None) in eidconsts:
+ eid = eidconsts[rhs.variable]
+ rhsinfo = (_CONSTANT, eid, residx)
+ else:
+ select.append_selected(rhs.copy(select))
+ rhsinfo = (_FROM_SUBSTEP, selidx, residx)
+ selidx += 1
+ residx += 1
+ selectedidx[rhskey] = rhsinfo
+ else:
+ rhsinfo = selectedidx[rhskey][:-1] + (None,)
rschema = getrschema(relation.r_type)
+ updatedefs.append( (lhsinfo, rhsinfo, rschema) )
if rschema.final or rschema.inlined:
- attrrelations.append(relation)
- else:
- relations.append(relation)
- # add step necessary to fetch all selected variables values
- if rqlst.where is not None:
- select.set_where(rqlst.where.copy(select))
- # set distinct to avoid potential duplicate key error
- select.distinct = True
- step = UpdateStep(plan, attrrelations, relations, selected_index)
- step.children += self._select_plan(plan, select, rqlst.solutions)
+ attributes.add(relation.r_type)
+ # the update step
+ step = UpdateStep(plan, updatedefs, attributes)
+ # when necessary add substep to fetch yet unknown values
+ if select.selection:
+ if rqlst.where is not None:
+ select.set_where(rqlst.where.copy(select))
+ # set distinct to avoid potential duplicate key error
+ select.distinct = True
+ step.children += self._select_plan(plan, select, rqlst.solutions)
return (step,)
# internal methods ########################################################
@@ -308,7 +397,7 @@
# UPDATE/INSERT/DELETE steps ##################################################
-class RelationsStep(Step):
+class InsertRelationsStep(Step):
"""step consisting in adding attributes/relations to entity defs from a
previous FetchStep
@@ -334,33 +423,38 @@
"""execute this step"""
base_edef = self.edef
edefs = []
- result = self.execute_child()
+ if self.children:
+ result = self.execute_child()
+ else:
+ result = [[]]
for row in result:
# get a new entity definition for this row
edef = copy(base_edef)
# complete this entity def using row values
- for i in range(len(self.rdefs)):
- rtype, rorder = self.rdefs[i]
- if rorder == RelationsStep.FINAL:
- edef[rtype] = row[i]
- elif rorder == RelationsStep.RELATION:
- self.plan.add_relation_def( (edef, rtype, row[i]) )
- edef.querier_pending_relations[(rtype, 'subject')] = row[i]
+ index = 0
+ for rtype, rorder, value in self.rdefs:
+ if value is _FROM_SUBSTEP:
+ value = row[index]
+ index += 1
+ if rorder == InsertRelationsStep.FINAL:
+ edef[rtype] = value
+ elif rorder == InsertRelationsStep.RELATION:
+ self.plan.add_relation_def( (edef, rtype, value) )
+ edef.querier_pending_relations[(rtype, 'subject')] = value
else:
- self.plan.add_relation_def( (row[i], rtype, edef) )
- edef.querier_pending_relations[(rtype, 'object')] = row[i]
+ self.plan.add_relation_def( (value, rtype, edef) )
+ edef.querier_pending_relations[(rtype, 'object')] = value
edefs.append(edef)
self.plan.substitute_entity_def(base_edef, edefs)
return result
-
class InsertStep(Step):
"""step consisting in inserting new entities / relations"""
def execute(self):
"""execute this step"""
for step in self.children:
- assert isinstance(step, RelationsStep)
+ assert isinstance(step, InsertRelationsStep)
step.plan = self.plan
step.execute()
# insert entities first
@@ -408,40 +502,46 @@
definitions and from results fetched in previous step
"""
- def __init__(self, plan, attribute_relations, relations, selected_index):
+ def __init__(self, plan, updatedefs, attributes):
Step.__init__(self, plan)
- self.attribute_relations = attribute_relations
- self.relations = relations
- self.selected_index = selected_index
+ self.updatedefs = updatedefs
+ self.attributes = attributes
def execute(self):
"""execute this step"""
- plan = self.plan
session = self.plan.session
repo = session.repo
edefs = {}
# insert relations
- attributes = set([relation.r_type for relation in self.attribute_relations])
- result = self.execute_child()
- for row in result:
- for relation in self.attribute_relations:
- lhs, rhs = relation.get_variable_parts()
- eid = typed_eid(row[self.selected_index[str(lhs)]])
- try:
- edef = edefs[eid]
- except KeyError:
- edefs[eid] = edef = session.entity_from_eid(eid)
- if isinstance(rhs, Constant):
- # add constant values to entity def
- value = rhs.eval(plan.args)
- edef[relation.r_type] = value
+ if self.children:
+ result = self.execute_child()
+ else:
+ result = [[]]
+ for i, row in enumerate(result):
+ newrow = []
+ for (lhsinfo, rhsinfo, rschema) in self.updatedefs:
+ lhsval = _handle_relterm(lhsinfo, row, newrow)
+ rhsval = _handle_relterm(rhsinfo, row, newrow)
+ if rschema.final or rschema.inlined:
+ eid = typed_eid(lhsval)
+ try:
+ edef = edefs[eid]
+ except KeyError:
+ edefs[eid] = edef = session.entity_from_eid(eid)
+ edef[str(rschema)] = rhsval
else:
- edef[relation.r_type] = row[self.selected_index[str(rhs)]]
- for relation in self.relations:
- subj = row[self.selected_index[str(relation.children[0])]]
- obj = row[self.selected_index[str(relation.children[1])]]
- repo.glob_add_relation(session, subj, relation.r_type, obj)
+ repo.glob_add_relation(session, lhsval, str(rschema), rhsval)
+ result[i] = newrow
# update entities
for eid, edef in edefs.iteritems():
- repo.glob_update_entity(session, edef, attributes)
+ repo.glob_update_entity(session, edef, self.attributes)
return result
+
+def _handle_relterm(info, row, newrow):
+ if info[0] is _CONSTANT:
+ val = info[1]
+ else: # _FROM_SUBSTEP
+ val = row[info[1]]
+ if info[-1] is not None:
+ newrow.append(val)
+ return val
--- a/server/test/unittest_msplanner.py Wed Mar 03 17:56:04 2010 +0100
+++ b/server/test/unittest_msplanner.py Wed Mar 03 17:59:05 2010 +0100
@@ -1520,15 +1520,11 @@
repo._type_source_cache[999999] = ('Note', 'cards', 999999)
repo._type_source_cache[999998] = ('State', 'system', None)
self._test('INSERT Note X: X in_state S, X type T WHERE S eid %(s)s, N eid %(n)s, N type T',
- [('FetchStep', [('Any T WHERE N eid 999999, N type T, N is Note',
- [{'N': 'Note', 'T': 'String'}])],
- [self.cards], None, {'N.type': 'table0.C0', 'T': 'table0.C0'}, []),
- ('InsertStep',
- [('RelationsStep',
- [('OneFetchStep', [('Any 999998,T WHERE N type T, N is Note',
+ [('InsertStep',
+ [('InsertRelationsStep',
+ [('OneFetchStep', [('Any T WHERE N eid 999999, N type T, N is Note',
[{'N': 'Note', 'T': 'String'}])],
- None, None, [self.system],
- {'N.type': 'table0.C0', 'T': 'table0.C0'}, [])])
+ None, None, [self.cards], {}, [])])
])
],
{'n': 999999, 's': 999998})
@@ -1537,15 +1533,11 @@
repo._type_source_cache[999999] = ('Note', 'cards', 999999)
repo._type_source_cache[999998] = ('State', 'system', None)
self._test('INSERT Note X: X in_state S, X type T, X migrated_from N WHERE S eid %(s)s, N eid %(n)s, N type T',
- [('FetchStep', [('Any T,N WHERE N eid 999999, N type T, N is Note',
- [{'N': 'Note', 'T': 'String'}])],
- [self.cards], None, {'N': 'table0.C1', 'N.type': 'table0.C0', 'T': 'table0.C0'}, []),
- ('InsertStep',
- [('RelationsStep',
- [('OneFetchStep', [('Any 999998,T,N WHERE N type T, N is Note',
+ [('InsertStep',
+ [('InsertRelationsStep',
+ [('OneFetchStep', [('Any T WHERE N eid 999999, N type T, N is Note',
[{'N': 'Note', 'T': 'String'}])],
- None, None, [self.system],
- {'N': 'table0.C1', 'N.type': 'table0.C0', 'T': 'table0.C0'}, [])
+ None, None, [self.cards], {}, [])
])
])
],
@@ -1556,8 +1548,8 @@
repo._type_source_cache[999998] = ('State', 'cards', 999998)
self._test('INSERT Note X: X in_state S, X type T WHERE S eid %(s)s, N eid %(n)s, N type T',
[('InsertStep',
- [('RelationsStep',
- [('OneFetchStep', [('Any 999998,T WHERE N eid 999999, N type T, N is Note',
+ [('InsertRelationsStep',
+ [('OneFetchStep', [('Any T WHERE N eid 999999, N type T, N is Note',
[{'N': 'Note', 'T': 'String'}])],
None, None, [self.cards], {}, [])]
)]
@@ -1569,10 +1561,7 @@
repo._type_source_cache[999998] = ('State', 'system', None)
self._test('INSERT Note X: X in_state S, X type "bla", X migrated_from N WHERE S eid %(s)s, N eid %(n)s',
[('InsertStep',
- [('RelationsStep',
- [('OneFetchStep', [('Any 999998,999999', [{}])],
- None, None, [self.system], {}, [])]
- )]
+ [('InsertRelationsStep', [])]
)],
{'n': 999999, 's': 999998})
@@ -1581,11 +1570,7 @@
repo._type_source_cache[999998] = ('State', 'system', None)
self._test('INSERT Note X: X in_state S, X type "bla", X migrated_from N WHERE S eid %(s)s, N eid %(n)s, A concerne N',
[('InsertStep',
- [('RelationsStep',
- [('OneFetchStep', [('Any 999998,999999 WHERE A concerne 999999, A is Affaire',
- [{'A': 'Affaire'}])],
- None, None, [self.system], {}, [])]
- )]
+ [('InsertRelationsStep', [])]
)],
{'n': 999999, 's': 999998})
@@ -1667,7 +1652,7 @@
# source, states should only be searched in the system source as well
self._test('SET X in_state S WHERE X eid %(x)s, S name "deactivated"',
[('UpdateStep', [
- ('OneFetchStep', [('DISTINCT Any 5,S WHERE S name "deactivated", S is State',
+ ('OneFetchStep', [('DISTINCT Any S WHERE S name "deactivated", S is State',
[{'S': 'State'}])],
None, None, [self.system], {}, []),
]),
@@ -1817,7 +1802,7 @@
[('FetchStep', [('Any Y WHERE Y multisource_rel 999998, Y is Note', [{'Y': 'Note'}])],
[self.cards], None, {'Y': u'table0.C0'}, []),
('UpdateStep',
- [('OneFetchStep', [('DISTINCT Any 999999,Y WHERE Y migrated_from 999998, Y is Note',
+ [('OneFetchStep', [('DISTINCT Any Y WHERE Y migrated_from 999998, Y is Note',
[{'Y': 'Note'}])],
None, None, [self.system],
{'Y': u'table0.C0'}, [])])],
@@ -1844,14 +1829,9 @@
def test_nonregr11(self):
repo._type_source_cache[999999] = ('Bookmark', 'system', 999999)
self._test('SET X bookmarked_by Y WHERE X eid %(x)s, Y login "hop"',
- [('FetchStep',
- [('Any Y WHERE Y login "hop", Y is CWUser', [{'Y': 'CWUser'}])],
- [self.ldap, self.system],
- None, {'Y': 'table0.C0'}, []),
- ('UpdateStep',
- [('OneFetchStep', [('DISTINCT Any 999999,Y WHERE Y is CWUser', [{'Y': 'CWUser'}])],
- None, None, [self.system], {'Y': 'table0.C0'},
- [])]
+ [('UpdateStep',
+ [('OneFetchStep', [('DISTINCT Any Y WHERE Y login "hop", Y is CWUser', [{'Y': 'CWUser'}])],
+ None, None, [self.ldap, self.system], {}, [])]
)],
{'x': 999999})
--- a/server/test/unittest_security.py Wed Mar 03 17:56:04 2010 +0100
+++ b/server/test/unittest_security.py Wed Mar 03 17:59:05 2010 +0100
@@ -436,8 +436,8 @@
rset = cu.execute('CWUser X WHERE X eid %(x)s', {'x': anon.eid}, 'x')
self.assertEquals(rset.rows, [[anon.eid]])
# but can't modify it
- cu.execute('SET X login "toto" WHERE X eid %(x)s', {'x': anon.eid})
- self.assertRaises(Unauthorized, cnx.commit)
+ self.assertRaises(Unauthorized,
+ cu.execute, 'SET X login "toto" WHERE X eid %(x)s', {'x': anon.eid})
def test_in_group_relation(self):
cnx = self.login('iaminusersgrouponly')