devtools/htmlparser.py
brancholdstable
changeset 7074 e4580e5f0703
parent 7014 7e3e80f4179a
child 8695 358d8bed9626
--- a/devtools/htmlparser.py	Fri Dec 10 12:17:18 2010 +0100
+++ b/devtools/htmlparser.py	Fri Mar 11 09:46:45 2011 +0100
@@ -15,16 +15,17 @@
 #
 # You should have received a copy of the GNU Lesser General Public License along
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
-"""defines a validating HTML parser used in web application tests
-
-"""
+"""defines a validating HTML parser used in web application tests"""
 
 import re
 import sys
 
 from lxml import etree
 
+from logilab.common.deprecation import class_deprecated
+
 from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE
+
 STRICT_DOCTYPE = str(STRICT_DOCTYPE)
 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE)
 
@@ -51,10 +52,7 @@
     def __init__(self):
         Validator.__init__(self)
         # XXX understand what's happening under windows
-        validate = True
-        if sys.platform == 'win32':
-            validate = False
-        self.parser = etree.XMLParser(dtd_validation=validate)
+        self.parser = etree.XMLParser(dtd_validation=sys.platform != 'win32')
 
     def preprocess_data(self, data):
         """used to fix potential blockquote mess generated by docutils"""
@@ -87,12 +85,14 @@
         Validator.__init__(self)
         self.parser = etree.XMLParser()
 
+
 class XMLDemotingValidator(SaxOnlyValidator):
     """ some views produce html instead of xhtml, using demote_to_html
 
     this is typically related to the use of external dependencies
     which do not produce valid xhtml (google maps, ...)
     """
+    __metaclass__ = class_deprecated
 
     def preprocess_data(self, data):
         if data.startswith('<?xml'):
@@ -127,15 +127,46 @@
         self.input_tags = self.find_tag('input')
         self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags]
 
+    def _iterstr(self, tag):
+        if self.default_ns is None:
+            return ".//%s" % tag
+        else:
+            return ".//{%s}%s" % (self.default_ns, tag)
+
+    def matching_nodes(self, tag, **attrs):
+        for elt in self.etree.iterfind(self._iterstr(tag)):
+            eltattrs  = elt.attrib
+            for attr, value in attrs.iteritems():
+                try:
+                    if eltattrs[attr] != value:
+                        break
+                except KeyError:
+                    break
+            else: # all attributes match
+                yield elt
+
+    def has_tag(self, tag, nboccurs=1, **attrs):
+        """returns True if tag with given attributes appears in the page
+        `nbtimes` (any if None)
+        """
+        for elt in self.matching_nodes(tag, **attrs):
+            if nboccurs is None: # no need to check number of occurences
+                return True
+            if not nboccurs: # too much occurences
+                return False
+            nboccurs -= 1
+        if nboccurs == 0: # correct number of occurences
+            return True
+        return False # no matching tag/attrs
+
     def find_tag(self, tag, gettext=True):
         """return a list which contains text of all "tag" elements """
-        if self.default_ns is None:
-            iterstr = ".//%s" % tag
-        else:
-            iterstr = ".//{%s}%s" % (self.default_ns, tag)
+        iterstr = self._iterstr(tag)
         if not gettext or tag in ('a', 'input'):
-            return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)]
-        return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)]
+            return [(elt.text, elt.attrib)
+                    for elt in self.etree.iterfind(iterstr)]
+        return [u''.join(elt.xpath('.//text()'))
+                for elt in self.etree.iterfind(iterstr)]
 
     def appears(self, text):
         """returns True if <text> appears in the page"""