[devtools] improve error message when postgresql tools are missing
By default in at least Debian, some pg tools are not present in the
PATH. Or they may not be installed. But the tests tools expects them
to be in the PATH, and give an unhelpful 'No such file or directory'
backtrace if they're not found. To help devs using the pg tests we
improve the error message.
# copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr## This file is part of CubicWeb.## CubicWeb is free software: you can redistribute it and/or modify it under the# terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## CubicWeb is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with CubicWeb. If not, see <http://www.gnu.org/licenses/>."""defines a validating HTML parser used in web application tests"""importreimportsysfromxmlimportsaxfromcStringIOimportStringIOfromlxmlimportetreefromlogilab.common.deprecationimportclass_deprecated,class_renamedfromcubicweb.viewimportSTRICT_DOCTYPE,TRANSITIONAL_DOCTYPESTRICT_DOCTYPE=str(STRICT_DOCTYPE)TRANSITIONAL_DOCTYPE=str(TRANSITIONAL_DOCTYPE)ERR_COUNT=0_REM_SCRIPT_RGX=re.compile(r"<script[^>]*>.*?</script>",re.U|re.M|re.I|re.S)def_remove_script_tags(data):"""Remove the script (usually javascript) tags to help the lxml XMLParser / HTMLParser do their job. Without that, they choke on tags embedded in JS strings. """# Notice we may want to use lxml cleaner, but it's far too intrusive:## cleaner = Cleaner(scripts=True,# javascript=False,# comments=False,# style=False,# links=False,# meta=False,# page_structure=False,# processing_instructions=False,# embedded=False,# frames=False,# forms=False,# annoying_tags=False,# remove_tags=(),# remove_unknown_tags=False,# safe_attrs_only=False,# add_nofollow=False)# >>> cleaner.clean_html('<body></body>')# '<span></span>'# >>> cleaner.clean_html('<!DOCTYPE html><body></body>')# '<html><body></body></html>'# >>> cleaner.clean_html('<body><div/></body>')# '<div></div>'# >>> cleaner.clean_html('<html><body><div/><br></body><html>')# '<html><body><div></div><br></body></html>'# >>> cleaner.clean_html('<html><body><div/><br><span></body><html>')# '<html><body><div></div><br><span></span></body></html>'## using that, we'll miss most actual validation error we want to# catch. For now, use dumb regexpreturn_REM_SCRIPT_RGX.sub('',data)classValidator(object):""" base validator API """parser=Nonedefparse_string(self,source):etree=self._parse(self.preprocess_data(source))returnPageInfo(source,etree)defpreprocess_data(self,data):returndatadef_parse(self,pdata):try:returnetree.fromstring(pdata,self.parser)exceptetree.XMLSyntaxErrorasexc:defsave_in(fname=''):file(fname,'w').write(data)new_exc=AssertionError(u'invalid document: %s'%exc)new_exc.position=exc.positionraisenew_excclassDTDValidator(Validator):def__init__(self):Validator.__init__(self)# XXX understand what's happening under windowsself.parser=etree.XMLParser(dtd_validation=sys.platform!='win32')defpreprocess_data(self,data):"""used to fix potential blockquote mess generated by docutils"""ifSTRICT_DOCTYPEnotindata:returndata# parse using transitional DTDdata=data.replace(STRICT_DOCTYPE,TRANSITIONAL_DOCTYPE)tree=self._parse(data)namespace=tree.nsmap.get(None)# this is the list of authorized child tags for <blockquote> nodesexpected='p h1 h2 h3 h4 h5 h6 div ul ol dl pre hr blockquote address ' \'fieldset table form noscript ins del script'.split()ifnamespace:blockquotes=tree.findall('.//{%s}blockquote'%namespace)expected=['{%s}%s'%(namespace,tag)fortaginexpected]else:blockquotes=tree.findall('.//blockquote')# quick and dirty approach: remove all blockquotesforblockquoteinblockquotes:parent=blockquote.getparent()parent.remove(blockquote)data=etree.tostring(tree)return'<?xml version="1.0" encoding="UTF-8"?>%s\n%s'%(STRICT_DOCTYPE,data)classXMLValidator(Validator):"""XML validator, checks that XML is well-formed and used XMLNS are defined"""def__init__(self):Validator.__init__(self)self.parser=etree.XMLParser()SaxOnlyValidator=class_renamed('SaxOnlyValidator',XMLValidator,'[3.17] you should use the ''XMLValidator class instead')classXMLSyntaxValidator(Validator):"""XML syntax validator, check XML is well-formed"""classMySaxErrorHandler(sax.ErrorHandler):"""override default handler to avoid choking because of unknown entity"""deffatalError(self,exception):# XXX check entity in htmlentitydefsifnotstr(exception).endswith('undefined entity'):raiseexception_parser=sax.make_parser()_parser.setContentHandler(sax.handler.ContentHandler())_parser.setErrorHandler(MySaxErrorHandler())def__init__(self):super(XMLSyntaxValidator,self).__init__()# XMLParser() wants xml namespaces defined# XMLParser(recover=True) will accept almost anything## -> use the later but preprocess will check xml well-formness using a# dumb SAX parserself.parser=etree.XMLParser(recover=True)defpreprocess_data(self,data):return_remove_script_tags(data)def_parse(self,data):inpsrc=sax.InputSource()inpsrc.setByteStream(StringIO(data))try:self._parser.parse(inpsrc)exceptsax.SAXParseException,exc:new_exc=AssertionError(u'invalid document: %s'%exc)new_exc.position=(exc._linenum,exc._colnum)raisenew_excreturnsuper(XMLSyntaxValidator,self)._parse(data)classXMLDemotingValidator(XMLValidator):""" some views produce html instead of xhtml, using demote_to_html this is typically related to the use of external dependencies which do not produce valid xhtml (google maps, ...) """__metaclass__=class_deprecated__deprecation_warning__='[3.10] this is now handled in testlib.py'defpreprocess_data(self,data):ifdata.startswith('<?xml'):self.parser=etree.XMLParser()else:self.parser=etree.HTMLParser()returndataclassHTMLValidator(Validator):def__init__(self):Validator.__init__(self)self.parser=etree.HTMLParser(recover=False)defpreprocess_data(self,data):return_remove_script_tags(data)classPageInfo(object):"""holds various informations on the view's output"""def__init__(self,source,root):self.source=sourceself.etree=rootself.raw_text=u''.join(root.xpath('//text()'))self.namespace=self.etree.nsmapself.default_ns=self.namespace.get(None)self.a_tags=self.find_tag('a')self.h1_tags=self.find_tag('h1')self.h2_tags=self.find_tag('h2')self.h3_tags=self.find_tag('h3')self.h4_tags=self.find_tag('h4')self.input_tags=self.find_tag('input')self.title_tags=[self.h1_tags,self.h2_tags,self.h3_tags,self.h4_tags]def_iterstr(self,tag):ifself.default_nsisNone:return".//%s"%tagelse:return".//{%s}%s"%(self.default_ns,tag)defmatching_nodes(self,tag,**attrs):foreltinself.etree.iterfind(self._iterstr(tag)):eltattrs=elt.attribforattr,valueinattrs.iteritems():try:ifeltattrs[attr]!=value:breakexceptKeyError:breakelse:# all attributes matchyieldeltdefhas_tag(self,tag,nboccurs=1,**attrs):"""returns True if tag with given attributes appears in the page `nbtimes` (any if None) """foreltinself.matching_nodes(tag,**attrs):ifnboccursisNone:# no need to check number of occurencesreturnTrueifnotnboccurs:# too much occurencesreturnFalsenboccurs-=1ifnboccurs==0:# correct number of occurencesreturnTruereturnFalse# no matching tag/attrsdeffind_tag(self,tag,gettext=True):"""return a list which contains text of all "tag" elements """iterstr=self._iterstr(tag)ifnotgettextortagin('a','input'):return[(elt.text,elt.attrib)foreltinself.etree.iterfind(iterstr)]return[u''.join(elt.xpath('.//text()'))foreltinself.etree.iterfind(iterstr)]defappears(self,text):"""returns True if <text> appears in the page"""returntextinself.raw_textdef__contains__(self,text):returntextinself.sourcedefhas_title(self,text,level=None):"""returns True if <h?>text</h?> :param level: the title's level (1 for h1, 2 for h2, etc.) """iflevelisNone:forhlistinself.title_tags:iftextinhlist:returnTruereturnFalseelse:hlist=self.title_tags[level-1]returntextinhlistdefhas_title_regexp(self,pattern,level=None):"""returns True if <h?>pattern</h?>"""sre=re.compile(pattern)iflevelisNone:forhlistinself.title_tags:fortitleinhlist:ifsre.match(title):returnTruereturnFalseelse:hlist=self.title_tags[level-1]fortitleinhlist:ifsre.match(title):returnTruereturnFalsedefhas_link(self,text,url=None):"""returns True if <a href=url>text</a> was found in the page"""forlink_text,attrsinself.a_tags:iftext==link_text:ifurlisNone:returnTruetry:href=attrs['href']ifhref==url:returnTrueexceptKeyError:continuereturnFalsedefhas_link_regexp(self,pattern,url=None):"""returns True if <a href=url>pattern</a> was found in the page"""sre=re.compile(pattern)forlink_text,attrsinself.a_tags:ifsre.match(link_text):ifurlisNone:returnTruetry:href=attrs['href']ifhref==url:returnTrueexceptKeyError:continuereturnFalseVALMAP={None:None,'dtd':DTDValidator,'xml':XMLValidator,'html':HTMLValidator,}