[skeleton i18n] Add PO-Revision-Date header to make emacs po-mode happy
Closes #3276389
# copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved.# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr## This file is part of CubicWeb.## CubicWeb is free software: you can redistribute it and/or modify it under the# terms of the GNU Lesser General Public License as published by the Free# Software Foundation, either version 2.1 of the License, or (at your option)# any later version.## CubicWeb is distributed in the hope that it will be useful, but WITHOUT# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more# details.## You should have received a copy of the GNU Lesser General Public License along# with CubicWeb. If not, see <http://www.gnu.org/licenses/>."""defines a validating HTML parser used in web application tests"""importreimportsysfromxmlimportsaxfromcStringIOimportStringIOfromlxmlimportetreefromlogilab.common.deprecationimportclass_deprecated,class_renamedfromcubicweb.viewimportSTRICT_DOCTYPE,TRANSITIONAL_DOCTYPESTRICT_DOCTYPE=str(STRICT_DOCTYPE)TRANSITIONAL_DOCTYPE=str(TRANSITIONAL_DOCTYPE)ERR_COUNT=0_REM_SCRIPT_RGX=re.compile(r"<script[^>]*>.*?</script>",re.U|re.M|re.I|re.S)def_remove_script_tags(data):"""Remove the script (usually javascript) tags to help the lxml XMLParser / HTMLParser do their job. Without that, they choke on tags embedded in JS strings. """# Notice we may want to use lxml cleaner, but it's far too intrusive:## cleaner = Cleaner(scripts=True,# javascript=False,# comments=False,# style=False,# links=False,# meta=False,# page_structure=False,# processing_instructions=False,# embedded=False,# frames=False,# forms=False,# annoying_tags=False,# remove_tags=(),# remove_unknown_tags=False,# safe_attrs_only=False,# add_nofollow=False)# >>> cleaner.clean_html('<body></body>')# '<span></span>'# >>> cleaner.clean_html('<!DOCTYPE html><body></body>')# '<html><body></body></html>'# >>> cleaner.clean_html('<body><div/></body>')# '<div></div>'# >>> cleaner.clean_html('<html><body><div/><br></body><html>')# '<html><body><div></div><br></body></html>'# >>> cleaner.clean_html('<html><body><div/><br><span></body><html>')# '<html><body><div></div><br><span></span></body></html>'## using that, we'll miss most actual validation error we want to# catch. For now, use dumb regexpreturn_REM_SCRIPT_RGX.sub('',data)classValidator(object):""" base validator API """parser=Nonedefparse_string(self,source):etree=self._parse(self.preprocess_data(source))returnPageInfo(source,etree)defpreprocess_data(self,data):returndatadef_parse(self,pdata):try:returnetree.fromstring(pdata,self.parser)exceptetree.XMLSyntaxErrorasexc:defsave_in(fname=''):file(fname,'w').write(data)new_exc=AssertionError(u'invalid document: %s'%exc)new_exc.position=exc.positionraisenew_excclassDTDValidator(Validator):def__init__(self):Validator.__init__(self)# XXX understand what's happening under windowsself.parser=etree.XMLParser(dtd_validation=sys.platform!='win32')defpreprocess_data(self,data):"""used to fix potential blockquote mess generated by docutils"""ifSTRICT_DOCTYPEnotindata:returndata# parse using transitional DTDdata=data.replace(STRICT_DOCTYPE,TRANSITIONAL_DOCTYPE)tree=self._parse(data)namespace=tree.nsmap.get(None)# this is the list of authorized child tags for <blockquote> nodesexpected='p h1 h2 h3 h4 h5 h6 div ul ol dl pre hr blockquote address ' \'fieldset table form noscript ins del script'.split()ifnamespace:blockquotes=tree.findall('.//{%s}blockquote'%namespace)expected=['{%s}%s'%(namespace,tag)fortaginexpected]else:blockquotes=tree.findall('.//blockquote')# quick and dirty approach: remove all blockquotesforblockquoteinblockquotes:parent=blockquote.getparent()parent.remove(blockquote)data=etree.tostring(tree)return'<?xml version="1.0" encoding="UTF-8"?>%s\n%s'%(STRICT_DOCTYPE,data)classXMLValidator(Validator):"""XML validator, checks that XML is well-formed and used XMLNS are defined"""def__init__(self):Validator.__init__(self)self.parser=etree.XMLParser()SaxOnlyValidator=class_renamed('SaxOnlyValidator',XMLValidator,'[3.17] you should use the ''XMLValidator class instead')classXMLSyntaxValidator(Validator):"""XML syntax validator, check XML is well-formed"""classMySaxErrorHandler(sax.ErrorHandler):"""override default handler to avoid choking because of unknown entity"""deffatalError(self,exception):# XXX check entity in htmlentitydefsifnotstr(exception).endswith('undefined entity'):raiseexception_parser=sax.make_parser()_parser.setContentHandler(sax.handler.ContentHandler())_parser.setErrorHandler(MySaxErrorHandler())def__init__(self):super(XMLSyntaxValidator,self).__init__()# XMLParser() wants xml namespaces defined# XMLParser(recover=True) will accept almost anything## -> use the later but preprocess will check xml well-formness using a# dumb SAX parserself.parser=etree.XMLParser(recover=True)defpreprocess_data(self,data):return_remove_script_tags(data)def_parse(self,data):inpsrc=sax.InputSource()inpsrc.setByteStream(StringIO(data))try:self._parser.parse(inpsrc)exceptsax.SAXParseException,exc:new_exc=AssertionError(u'invalid document: %s'%exc)new_exc.position=(exc._linenum,exc._colnum)raisenew_excreturnsuper(XMLSyntaxValidator,self)._parse(data)classXMLDemotingValidator(XMLValidator):""" some views produce html instead of xhtml, using demote_to_html this is typically related to the use of external dependencies which do not produce valid xhtml (google maps, ...) """__metaclass__=class_deprecated__deprecation_warning__='[3.10] this is now handled in testlib.py'defpreprocess_data(self,data):ifdata.startswith('<?xml'):self.parser=etree.XMLParser()else:self.parser=etree.HTMLParser()returndataclassHTMLValidator(Validator):def__init__(self):Validator.__init__(self)self.parser=etree.HTMLParser(recover=False)defpreprocess_data(self,data):return_remove_script_tags(data)classPageInfo(object):"""holds various informations on the view's output"""def__init__(self,source,root):self.source=sourceself.etree=rootself.raw_text=u''.join(root.xpath('//text()'))self.namespace=self.etree.nsmapself.default_ns=self.namespace.get(None)self.a_tags=self.find_tag('a')self.h1_tags=self.find_tag('h1')self.h2_tags=self.find_tag('h2')self.h3_tags=self.find_tag('h3')self.h4_tags=self.find_tag('h4')self.input_tags=self.find_tag('input')self.title_tags=[self.h1_tags,self.h2_tags,self.h3_tags,self.h4_tags]def_iterstr(self,tag):ifself.default_nsisNone:return".//%s"%tagelse:return".//{%s}%s"%(self.default_ns,tag)defmatching_nodes(self,tag,**attrs):foreltinself.etree.iterfind(self._iterstr(tag)):eltattrs=elt.attribforattr,valueinattrs.iteritems():try:ifeltattrs[attr]!=value:breakexceptKeyError:breakelse:# all attributes matchyieldeltdefhas_tag(self,tag,nboccurs=1,**attrs):"""returns True if tag with given attributes appears in the page `nbtimes` (any if None) """foreltinself.matching_nodes(tag,**attrs):ifnboccursisNone:# no need to check number of occurencesreturnTrueifnotnboccurs:# too much occurencesreturnFalsenboccurs-=1ifnboccurs==0:# correct number of occurencesreturnTruereturnFalse# no matching tag/attrsdeffind_tag(self,tag,gettext=True):"""return a list which contains text of all "tag" elements """iterstr=self._iterstr(tag)ifnotgettextortagin('a','input'):return[(elt.text,elt.attrib)foreltinself.etree.iterfind(iterstr)]return[u''.join(elt.xpath('.//text()'))foreltinself.etree.iterfind(iterstr)]defappears(self,text):"""returns True if <text> appears in the page"""returntextinself.raw_textdef__contains__(self,text):returntextinself.sourcedefhas_title(self,text,level=None):"""returns True if <h?>text</h?> :param level: the title's level (1 for h1, 2 for h2, etc.) """iflevelisNone:forhlistinself.title_tags:iftextinhlist:returnTruereturnFalseelse:hlist=self.title_tags[level-1]returntextinhlistdefhas_title_regexp(self,pattern,level=None):"""returns True if <h?>pattern</h?>"""sre=re.compile(pattern)iflevelisNone:forhlistinself.title_tags:fortitleinhlist:ifsre.match(title):returnTruereturnFalseelse:hlist=self.title_tags[level-1]fortitleinhlist:ifsre.match(title):returnTruereturnFalsedefhas_link(self,text,url=None):"""returns True if <a href=url>text</a> was found in the page"""forlink_text,attrsinself.a_tags:iftext==link_text:ifurlisNone:returnTruetry:href=attrs['href']ifhref==url:returnTrueexceptKeyError:continuereturnFalsedefhas_link_regexp(self,pattern,url=None):"""returns True if <a href=url>pattern</a> was found in the page"""sre=re.compile(pattern)forlink_text,attrsinself.a_tags:ifsre.match(link_text):ifurlisNone:returnTruetry:href=attrs['href']ifhref==url:returnTrueexceptKeyError:continuereturnFalseVALMAP={None:None,'dtd':DTDValidator,'xml':XMLValidator,'html':HTMLValidator,}