--- a/devtools/htmlparser.py Fri Dec 10 12:17:18 2010 +0100
+++ b/devtools/htmlparser.py Fri Mar 11 09:46:45 2011 +0100
@@ -15,16 +15,17 @@
#
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
-"""defines a validating HTML parser used in web application tests
-
-"""
+"""defines a validating HTML parser used in web application tests"""
import re
import sys
from lxml import etree
+from logilab.common.deprecation import class_deprecated
+
from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE
+
STRICT_DOCTYPE = str(STRICT_DOCTYPE)
TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE)
@@ -51,10 +52,7 @@
def __init__(self):
Validator.__init__(self)
# XXX understand what's happening under windows
- validate = True
- if sys.platform == 'win32':
- validate = False
- self.parser = etree.XMLParser(dtd_validation=validate)
+ self.parser = etree.XMLParser(dtd_validation=sys.platform != 'win32')
def preprocess_data(self, data):
"""used to fix potential blockquote mess generated by docutils"""
@@ -87,12 +85,14 @@
Validator.__init__(self)
self.parser = etree.XMLParser()
+
class XMLDemotingValidator(SaxOnlyValidator):
""" some views produce html instead of xhtml, using demote_to_html
this is typically related to the use of external dependencies
which do not produce valid xhtml (google maps, ...)
"""
+ __metaclass__ = class_deprecated
def preprocess_data(self, data):
if data.startswith('<?xml'):
@@ -127,15 +127,46 @@
self.input_tags = self.find_tag('input')
self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags]
+ def _iterstr(self, tag):
+ if self.default_ns is None:
+ return ".//%s" % tag
+ else:
+ return ".//{%s}%s" % (self.default_ns, tag)
+
+ def matching_nodes(self, tag, **attrs):
+ for elt in self.etree.iterfind(self._iterstr(tag)):
+ eltattrs = elt.attrib
+ for attr, value in attrs.iteritems():
+ try:
+ if eltattrs[attr] != value:
+ break
+ except KeyError:
+ break
+ else: # all attributes match
+ yield elt
+
+ def has_tag(self, tag, nboccurs=1, **attrs):
+ """returns True if tag with given attributes appears in the page
+ `nbtimes` (any if None)
+ """
+ for elt in self.matching_nodes(tag, **attrs):
+ if nboccurs is None: # no need to check number of occurences
+ return True
+ if not nboccurs: # too much occurences
+ return False
+ nboccurs -= 1
+ if nboccurs == 0: # correct number of occurences
+ return True
+ return False # no matching tag/attrs
+
def find_tag(self, tag, gettext=True):
"""return a list which contains text of all "tag" elements """
- if self.default_ns is None:
- iterstr = ".//%s" % tag
- else:
- iterstr = ".//{%s}%s" % (self.default_ns, tag)
+ iterstr = self._iterstr(tag)
if not gettext or tag in ('a', 'input'):
- return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)]
- return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)]
+ return [(elt.text, elt.attrib)
+ for elt in self.etree.iterfind(iterstr)]
+ return [u''.join(elt.xpath('.//text()'))
+ for elt in self.etree.iterfind(iterstr)]
def appears(self, text):
"""returns True if <text> appears in the page"""