# copyright 2003-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr## This file is part of CubicWeb.## CubicWeb is free software: you can redistribute it and/or modify it under the# terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## CubicWeb is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with CubicWeb. If not, see <http://www.gnu.org/licenses/>."""Helper classes to execute RQL queries on a set of sources, performingsecurity checking and data aggregation."""from__future__importprint_function__docformat__="restructuredtext en"fromitertoolsimportrepeatfromsiximporttext_type,string_types,integer_typesfromsix.movesimportrangefromrqlimportRQLSyntaxError,CoercionErrorfromrql.stmtsimportUnionfromrql.nodesimportETYPE_PYOBJ_MAP,etype_from_pyobj,Relation,Exists,NotfromyamsimportBASE_TYPESfromcubicwebimportValidationError,Unauthorized,UnknownEidfromcubicweb.rqlrewriteimportRQLRelationRewriterfromcubicwebimportBinary,serverfromcubicweb.rsetimportResultSetfromcubicweb.utilsimportQueryCache,RepeatListfromcubicweb.server.rqlannotationimportSQLGenAnnotator,set_qdatafromcubicweb.server.ssplannerimportREAD_ONLY_RTYPES,add_types_restrictionfromcubicweb.server.editionimportEditedEntityfromcubicweb.server.ssplannerimportSSPlannerfromcubicweb.statsd_loggerimportstatsd_timeit,statsd_cETYPE_PYOBJ_MAP[Binary]='Bytes'defempty_rset(rql,args,rqlst=None):"""build an empty result set object"""returnResultSet([],rql,args,rqlst=rqlst)defupdate_varmap(varmap,selected,table):"""return a sql schema to store RQL query result"""fori,terminenumerate(selected):key=term.as_string()value='%s.C%s'%(table,i)ifvarmap.get(key,value)!=value:raiseException('variable name conflict on %s: got %s / %s'%(key,value,varmap))varmap[key]=value# permission utilities ########################################################defcheck_no_password_selected(rqlst):"""check that Password entities are not selected"""forsolutioninrqlst.solutions:forvar,etypeinsolution.items():ifetype=='Password':raiseUnauthorized('Password selection is not allowed (%s)'%var)defterm_etype(cnx,term,solution,args):"""return the entity type for the given term (a VariableRef or a Constant node) """try:returnsolution[term.name]exceptAttributeError:returncnx.entity_metas(term.eval(args))['type']defcheck_relations_read_access(cnx,select,args):"""Raise :exc:`Unauthorized` if the given user doesn't have credentials to read relations used in the given syntax tree """# use `term_etype` since we've to deal with rewritten constants here,# when used as an external source by another repository.# XXX what about local read security w/ those rewritten constants...# XXX constants can also happen in some queries generated by req.find()DBG=(server.DEBUG&server.DBG_SEC)and'read'inserver._SECURITY_CAPSschema=cnx.repo.schemauser=cnx.userifselect.whereisnotNone:forrelinselect.where.iget_nodes(Relation):forsolutioninselect.solutions:# XXX has_text may have specific perm ?ifrel.r_typeinREAD_ONLY_RTYPES:continuerschema=schema.rschema(rel.r_type)ifrschema.final:eschema=schema.eschema(term_etype(cnx,rel.children[0],solution,args))rdef=eschema.rdef(rschema)else:rdef=rschema.rdef(term_etype(cnx,rel.children[0],solution,args),term_etype(cnx,rel.children[1].children[0],solution,args))ifnotuser.matching_groups(rdef.get_groups('read')):ifDBG:print('check_read_access: %s%s does not match %s'%(rdef,user.groups,rdef.get_groups('read')))# XXX rqlexpr not allowedraiseUnauthorized('read',rel.r_type)ifDBG:print('check_read_access: %s%s matches %s'%(rdef,user.groups,rdef.get_groups('read')))defget_local_checks(cnx,rqlst,solution):"""Check that the given user has credentials to access data read by the query and return a dict defining necessary "local checks" (i.e. rql expression in read permission defined in the schema) where no group grants him the permission. Returned dictionary's keys are variable names and values the rql expressions for this variable (with the given solution). Raise :exc:`Unauthorized` if access is known to be defined, i.e. if there is no matching group and no local permissions. """DBG=(server.DEBUG&server.DBG_SEC)and'read'inserver._SECURITY_CAPSschema=cnx.repo.schemauser=cnx.userlocalchecks={}# iterate on defined_vars and not on solutions to ignore column aliasesforvarnameinrqlst.defined_vars:eschema=schema.eschema(solution[varname])ifeschema.final:continueifnotuser.matching_groups(eschema.get_groups('read')):erqlexprs=eschema.get_rqlexprs('read')ifnoterqlexprs:ex=Unauthorized('read',solution[varname])ex.var=varnameifDBG:print('check_read_access: %s%s%s%s'%(varname,eschema,user.groups,eschema.get_groups('read')))raiseex# don't insert security on variable only referenced by 'NOT X relation Y' or# 'NOT EXISTS(X relation Y)'varinfo=rqlst.defined_vars[varname].stinfoifvarinfo['selected']or(len([rforrinvarinfo['relations']if(notschema.rschema(r.r_type).finaland((isinstance(r.parent,Exists)andr.parent.neged(strict=True))orisinstance(r.parent,Not)))])!=len(varinfo['relations'])):localchecks[varname]=erqlexprsreturnlocalchecks# Plans #######################################################################classExecutionPlan(object):"""the execution model of a rql query, composed of querier steps"""def__init__(self,querier,rqlst,args,cnx):# original rql syntax treeself.rqlst=rqlstself.args=argsor{}# cnx executing the queryself.cnx=cnx# quick reference to the system sourceself.syssource=cnx.repo.system_source# execution stepsself.steps=[]# various resource accesorsself.querier=querierself.schema=querier.schemaself.sqlannotate=querier.sqlgen_annotateself.rqlhelper=cnx.vreg.rqlhelperdefannotate_rqlst(self):ifnotself.rqlst.annotated:self.rqlhelper.annotate(self.rqlst)defadd_step(self,step):"""add a step to the plan"""self.steps.append(step)defsqlexec(self,sql,args=None):returnself.syssource.sqlexec(self.cnx,sql,args)defexecute(self):"""execute a plan and return resulting rows"""forstepinself.steps:result=step.execute()# the latest executed step contains the full query resultreturnresultdefpreprocess(self,union,security=True):"""insert security when necessary then annotate rql st for sql generation return rqlst to actually execute """cached=Noneifsecurityandself.cnx.read_security:# ensure security is turned of when security is inserted,# else we may loop for ever...ifself.cnx.transaction_data.get('security-rqlst-cache'):key=self.cache_keyelse:key=NoneifkeyisnotNoneandkeyinself.cnx.transaction_data:cachedunion,args=self.cnx.transaction_data[key]union.children[:]=[]forselectincachedunion.children:union.append(select)union.has_text_query=cachedunion.has_text_queryargs.update(self.args)self.args=argscached=Trueelse:withself.cnx.security_enabled(read=False):noinvariant=self._insert_security(union)ifkeyisnotNone:self.cnx.transaction_data[key]=(union,self.args)else:noinvariant=()ifcachedisNone:self.rqlhelper.simplify(union)self.sqlannotate(union)set_qdata(self.schema.rschema,union,noinvariant)ifunion.has_text_query:self.cache_key=Nonedef_insert_security(self,union):noinvariant=set()forselectinunion.children[:]:forsubqueryinselect.with_:self._insert_security(subquery.query)localchecks,restricted=self._check_permissions(select)ifany(localchecks):self.cnx.rql_rewriter.insert_local_checks(select,self.args,localchecks,restricted,noinvariant)returnnoinvariantdef_check_permissions(self,rqlst):"""Return a dict defining "local checks", i.e. RQLExpression defined in the schema that should be inserted in the original query, together with a set of variable names which requires some security to be inserted. Solutions where a variable has a type which the user can't definitly read are removed, else if the user *may* read it (i.e. if an rql expression is defined for the "read" permission of the related type), the local checks dict is updated. The local checks dict has entries for each different local check necessary, with associated solutions as value, a local check being defined by a list of 2-uple (variable name, rql expressions) for each variable which has to be checked. Solutions which don't require local checks will be associated to the empty tuple key. Note rqlst should not have been simplified at this point. """cnx=self.cnxmsgs=[]# dict(varname: eid), allowing to check rql expression for variables# which have a known eidvarkwargs={}ifnotcnx.transaction_data.get('security-rqlst-cache'):forvarinrqlst.defined_vars.values():ifvar.stinfo['constnode']isnotNone:eid=var.stinfo['constnode'].eval(self.args)varkwargs[var.name]=int(eid)# dictionary of variables restricted for security reasonlocalchecks={}restricted_vars=set()newsolutions=[]forsolutioninrqlst.solutions:try:localcheck=get_local_checks(cnx,rqlst,solution)exceptUnauthorizedasex:msg='remove %s from solutions since %s has no %s access to %s'msg%=(solution,cnx.user.login,ex.args[0],ex.args[1])msgs.append(msg)LOGGER.info(msg)else:newsolutions.append(solution)# try to benefit of rqlexpr.check cache for entities which# are specified by eid in query'argsforvarname,eidinvarkwargs.items():try:rqlexprs=localcheck.pop(varname)exceptKeyError:continue# if entity has been added in the current transaction, the# user can read it whatever rql expressions are associated# to its typeifcnx.added_in_transaction(eid):continueforrqlexprinrqlexprs:ifrqlexpr.check(cnx,eid):breakelse:raiseUnauthorized('No read acces on %r with eid %i.'%(var,eid))# mark variables protected by an rql expressionrestricted_vars.update(localcheck)# turn local check into a dict keylocalcheck=tuple(sorted(localcheck.items()))localchecks.setdefault(localcheck,[]).append(solution)# raise Unautorized exception if the user can't access to any solutionifnotnewsolutions:raiseUnauthorized('\n'.join(msgs))# if there is some message, solutions have been modified and must be# reconsidered by the syntax treeeifmsgs:rqlst.set_possible_types(newsolutions)returnlocalchecks,restricted_varsdeffinalize(self,select,solutions,insertedvars):rqlst=Union()rqlst.append(select)formainvarname,rschema,newvarnameininsertedvars:nvartype=str(rschema.objects(solutions[0][mainvarname])[0])forsolinsolutions:sol[newvarname]=nvartypeselect.clean_solutions(solutions)add_types_restriction(self.schema,select)self.rqlhelper.annotate(rqlst)self.preprocess(rqlst,security=False)returnrqlstclassInsertPlan(ExecutionPlan):"""an execution model specific to the INSERT rql query """def__init__(self,querier,rqlst,args,cnx):ExecutionPlan.__init__(self,querier,rqlst,args,cnx)# save originaly selected variable, we may modify this# dictionary for substitution (query parameters)self.selected=rqlst.selection# list of rows of entities definition (ssplanner.EditedEntity)self.e_defs=[[]]# list of new relation definition (3-uple (from_eid, r_type, to_eid)self.r_defs=set()# indexes to track entity definitions bound to relation definitionsself._r_subj_index={}self._r_obj_index={}self._expanded_r_defs={}defadd_entity_def(self,edef):"""add an entity definition to build"""self.e_defs[-1].append(edef)defadd_relation_def(self,rdef):"""add an relation definition to build"""self.r_defs.add(rdef)ifnotisinstance(rdef[0],int):self._r_subj_index.setdefault(rdef[0],[]).append(rdef)ifnotisinstance(rdef[2],int):self._r_obj_index.setdefault(rdef[2],[]).append(rdef)defsubstitute_entity_def(self,edef,edefs):"""substitute an incomplete entity definition by a list of complete equivalents e.g. on queries such as :: INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y WHERE U login 'admin', U login N X will be inserted as many times as U exists, and so the X travaille Y relations as to be added as many time as X is inserted """ifnotedefsornotself.e_defs:# no result, no entity will be createdself.e_defs=()return# first remove the incomplete entity definitioncolidx=self.e_defs[0].index(edef)fori,rowinenumerate(self.e_defs[:]):self.e_defs[i][colidx]=edefs[0]samplerow=self.e_defs[i]foredef_inedefs[1:]:row=[ed.clone()fori,edinenumerate(samplerow)ifi!=colidx]row.insert(colidx,edef_)self.e_defs.append(row)# now, see if this entity def is referenced as subject in some relation# definitionifedefinself._r_subj_index:forrdefinself._r_subj_index[edef]:expanded=self._expanded(rdef)result=[]forexp_rdefinexpanded:foredef_inedefs:result.append((edef_,exp_rdef[1],exp_rdef[2]))self._expanded_r_defs[rdef]=result# and finally, see if this entity def is referenced as object in some# relation definitionifedefinself._r_obj_index:forrdefinself._r_obj_index[edef]:expanded=self._expanded(rdef)result=[]forexp_rdefinexpanded:foredef_inedefs:result.append((exp_rdef[0],exp_rdef[1],edef_))self._expanded_r_defs[rdef]=resultdef_expanded(self,rdef):"""return expanded value for the given relation definition"""try:returnself._expanded_r_defs[rdef]exceptKeyError:self.r_defs.remove(rdef)return[rdef]defrelation_defs(self):"""return the list for relation definitions to insert"""forrdefsinself._expanded_r_defs.values():forrdefinrdefs:yieldrdefforrdefinself.r_defs:yieldrdefdefinsert_entity_defs(self):"""return eids of inserted entities in a suitable form for the resulting result set, e.g.: e.g. on queries such as :: INSERT Personne X, Societe Y: X nom N, Y nom 'toto', X travaille Y WHERE U login 'admin', U login N if there is two entities matching U, the result set will look like [(eidX1, eidY1), (eidX2, eidY2)] """cnx=self.cnxrepo=cnx.reporesults=[]forrowinself.e_defs:results.append([repo.glob_add_entity(cnx,edef)foredefinrow])returnresultsdefinsert_relation_defs(self):cnx=self.cnxrepo=cnx.repoedited_entities={}relations={}forsubj,rtype,objinself.relation_defs():# if a string is given into args instead of an int, we get it hereifisinstance(subj,string_types):subj=int(subj)elifnotisinstance(subj,integer_types):subj=subj.entity.eidifisinstance(obj,string_types):obj=int(obj)elifnotisinstance(obj,integer_types):obj=obj.entity.eidifrepo.schema.rschema(rtype).inlined:ifsubjnotinedited_entities:entity=cnx.entity_from_eid(subj)edited=EditedEntity(entity)edited_entities[subj]=editedelse:edited=edited_entities[subj]edited.edited_attribute(rtype,obj)else:ifrtypeinrelations:relations[rtype].append((subj,obj))else:relations[rtype]=[(subj,obj)]repo.glob_add_relations(cnx,relations)foreditedinedited_entities.values():repo.glob_update_entity(cnx,edited)classQuerierHelper(object):"""helper class to execute rql queries, putting all things together"""def__init__(self,repo,schema):# system info helperself._repo=repo# instance schemaself.set_schema(schema)defset_schema(self,schema):self.schema=schemarepo=self._repo# rql st and solution cache.self._rql_cache=QueryCache(repo.config['rql-cache-size'])# rql cache key cache. Don't bother using a Cache instance: we should# have a limited number of queries in there, since there are no entries# in this cache for user queries (which have no args)self._rql_ck_cache={}# some cache usage statsself.cache_hit,self.cache_miss=0,0# rql parsing / analysing helperself.solutions=repo.vreg.solutionsrqlhelper=repo.vreg.rqlhelper# set backend on the rql helper, will be used for function checkingrqlhelper.backend=repo.config.system_source_config['db-driver']self._parse=rqlhelper.parseself._annotate=rqlhelper.annotate# rql plannerself._planner=SSPlanner(schema,rqlhelper)# sql generation annotatorself.sqlgen_annotate=SQLGenAnnotator(schema).annotatedefparse(self,rql,annotate=False):"""return a rql syntax tree for the given rql"""try:returnself._parse(text_type(rql),annotate=annotate)exceptUnicodeError:raiseRQLSyntaxError(rql)defplan_factory(self,rqlst,args,cnx):"""create an execution plan for an INSERT RQL query"""ifrqlst.TYPE=='insert':returnInsertPlan(self,rqlst,args,cnx)returnExecutionPlan(self,rqlst,args,cnx)@statsd_timeitdefexecute(self,cnx,rql,args=None,build_descr=True):"""execute a rql query, return resulting rows and their description in a `ResultSet` object * `rql` should be a Unicode string or a plain ASCII string * `args` the optional parameters dictionary associated to the query * `build_descr` is a boolean flag indicating if the description should be built on select queries (if false, the description will be en empty list) on INSERT queries, there will be one row with the eid of each inserted entity result for DELETE and SET queries is undefined yet to maximize the rql parsing/analyzing cache performance, you should always use substitute arguments in queries (i.e. avoid query such as 'Any X WHERE X eid 123'!) """ifserver.DEBUG&(server.DBG_RQL|server.DBG_SQL):ifserver.DEBUG&(server.DBG_MORE|server.DBG_SQL):print('*'*80)print('querier input',repr(rql),repr(args))# parse the query and binds variablescachekey=(rql,)try:ifargs:# search for named args in query which are eids (hence# influencing query's solutions)eidkeys=self._rql_ck_cache[rql]ifeidkeys:# if there are some, we need a better cache key, eg (rql +# entity type of each eid)try:cachekey=self._repo.querier_cache_key(cnx,rql,args,eidkeys)exceptUnknownEid:# we want queries such as "Any X WHERE X eid 9999"# return an empty result instead of raising UnknownEidreturnempty_rset(rql,args)rqlst=self._rql_cache[cachekey]self.cache_hit+=1statsd_c('cache_hit')exceptKeyError:self.cache_miss+=1statsd_c('cache_miss')rqlst=self.parse(rql)try:# compute solutions for rqlst and return named args in query# which are eids. Notice that if you may not need `eidkeys`, we# have to compute solutions anyway (kept as annotation on the# tree)eidkeys=self.solutions(cnx,rqlst,args)exceptUnknownEid:# we want queries such as "Any X WHERE X eid 9999" return an# empty result instead of raising UnknownEidreturnempty_rset(rql,args)ifargsandrqlnotinself._rql_ck_cache:self._rql_ck_cache[rql]=eidkeysifeidkeys:cachekey=self._repo.querier_cache_key(cnx,rql,args,eidkeys)self._rql_cache[cachekey]=rqlstifrqlst.TYPE!='select':ifcnx.read_security:check_no_password_selected(rqlst)cachekey=Noneelse:ifcnx.read_security:forselectinrqlst.children:check_no_password_selected(select)check_relations_read_access(cnx,select,args)# on select query, always copy the cached rqlst so we don't have to# bother modifying it. This is not necessary on write queries since# a new syntax tree is built from them.rqlst=rqlst.copy()# Rewrite computed relationsrewriter=RQLRelationRewriter(cnx)rewriter.rewrite(rqlst,args)self._annotate(rqlst)ifargs:# different SQL generated when some argument is None or not (IS# NULL). This should be considered when computing sql cache keycachekey+=tuple(sorted([kfork,vinargs.items()ifvisNone]))# make an execution planplan=self.plan_factory(rqlst,args,cnx)plan.cache_key=cachekeyself._planner.build_plan(plan)# execute the plantry:results=plan.execute()except(Unauthorized,ValidationError):# getting an Unauthorized/ValidationError exception means the# transaction must be rolled back## notes:# * we should not reset the connections set here, since we don't want the# connection to loose it during processing# * don't rollback if we're in the commit process, will be handled# by the connectionifcnx.commit_stateisNone:cnx.commit_state='uncommitable'raise# build a description for the results if necessarydescr=()ifbuild_descr:ifrqlst.TYPE=='select':# sample selectioniflen(rqlst.children)==1andlen(rqlst.children[0].solutions)==1:# easy, all lines are identicalselected=rqlst.children[0].selectionsolution=rqlst.children[0].solutions[0]description=_make_description(selected,args,solution)descr=RepeatList(len(results),tuple(description))else:# hard, delegate the work :o)descr=manual_build_descr(cnx,rqlst,args,results)elifrqlst.TYPE=='insert':# on insert plan, some entities may have been auto-casted,# so compute description manually even if there is only# one solutionbasedescr=[None]*len(plan.selected)todetermine=list(zip(range(len(plan.selected)),repeat(False)))descr=_build_descr(cnx,results,basedescr,todetermine)# FIXME: get number of affected entities / relations on non# selection queries ?# return a result set objectreturnResultSet(results,rql,args,descr)# these are overridden by set_log_methods below# only defining here to prevent pylint from complaininginfo=warning=error=critical=exception=debug=lambdamsg,*a,**kw:NonefromloggingimportgetLoggerfromcubicwebimportset_log_methodsLOGGER=getLogger('cubicweb.querier')set_log_methods(QuerierHelper,LOGGER)defmanual_build_descr(cnx,rqlst,args,result):"""build a description for a given result by analysing each row XXX could probably be done more efficiently during execution of query """# not so easy, looks for variable which changes from one solution# to anotherunstables=rqlst.get_variable_indices()basedescr=[]todetermine=[]foriinrange(len(rqlst.children[0].selection)):ttype=_selection_idx_type(i,rqlst,args)ifttypeisNoneorttype=='Any':ttype=Noneisfinal=Trueelse:isfinal=ttypeinBASE_TYPESifttypeisNoneoriinunstables:basedescr.append(None)todetermine.append((i,isfinal))else:basedescr.append(ttype)ifnottodetermine:returnRepeatList(len(result),tuple(basedescr))return_build_descr(cnx,result,basedescr,todetermine)def_build_descr(cnx,result,basedescription,todetermine):description=[]entity_metas=cnx.entity_metastodel=[]fori,rowinenumerate(result):row_descr=basedescription[:]forindex,isfinalintodetermine:value=row[index]ifvalueisNone:# None value inserted by an outer join, no typerow_descr[index]=Nonecontinueifisfinal:row_descr[index]=etype_from_pyobj(value)else:try:row_descr[index]=entity_metas(value)['type']exceptUnknownEid:cnx.error('wrong eid %s in repository, you should ''db-check the database'%value)todel.append(i)breakelse:description.append(tuple(row_descr))foriinreversed(todel):delresult[i]returndescriptiondef_make_description(selected,args,solution):"""return a description for a result set"""description=[]forterminselected:description.append(term.get_type(solution,args))returndescriptiondef_selection_idx_type(i,rqlst,args):"""try to return type of term at index `i` of the rqlst's selection"""forselectinrqlst.children:term=select.selection[i]forsolutioninselect.solutions:try:ttype=term.get_type(solution,args)ifttypeisnotNone:returnttypeexceptCoercionError:returnNone