devtools/htmlparser.py
branchstable
changeset 7014 7e3e80f4179a
parent 6977 cb78108bf603
child 8695 358d8bed9626
--- a/devtools/htmlparser.py	Thu Feb 17 18:43:45 2011 +0100
+++ b/devtools/htmlparser.py	Fri Feb 18 09:33:32 2011 +0100
@@ -127,15 +127,41 @@
         self.input_tags = self.find_tag('input')
         self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags]
 
-    def iterstr(self, tag):
+    def _iterstr(self, tag):
         if self.default_ns is None:
             return ".//%s" % tag
         else:
             return ".//{%s}%s" % (self.default_ns, tag)
 
+    def matching_nodes(self, tag, **attrs):
+        for elt in self.etree.iterfind(self._iterstr(tag)):
+            eltattrs  = elt.attrib
+            for attr, value in attrs.iteritems():
+                try:
+                    if eltattrs[attr] != value:
+                        break
+                except KeyError:
+                    break
+            else: # all attributes match
+                yield elt
+
+    def has_tag(self, tag, nboccurs=1, **attrs):
+        """returns True if tag with given attributes appears in the page
+        `nbtimes` (any if None)
+        """
+        for elt in self.matching_nodes(tag, **attrs):
+            if nboccurs is None: # no need to check number of occurences
+                return True
+            if not nboccurs: # too much occurences
+                return False
+            nboccurs -= 1
+        if nboccurs == 0: # correct number of occurences
+            return True
+        return False # no matching tag/attrs
+
     def find_tag(self, tag, gettext=True):
         """return a list which contains text of all "tag" elements """
-        iterstr = self.iterstr(tag)
+        iterstr = self._iterstr(tag)
         if not gettext or tag in ('a', 'input'):
             return [(elt.text, elt.attrib)
                     for elt in self.etree.iterfind(iterstr)]
@@ -146,28 +172,6 @@
         """returns True if <text> appears in the page"""
         return text in self.raw_text
 
-    def has_tag(self, tag, nboccurs=1, **attrs):
-        """returns True if tag with given attributes appears in the page
-        `nbtimes` (any if None)
-        """
-        for elt in self.etree.iterfind(self.iterstr(tag)):
-            eltattrs  = elt.attrib
-            for attr, value in attrs.iteritems():
-                try:
-                    if eltattrs[attr] != value:
-                        break
-                except KeyError:
-                    break
-            else: # all attributes match
-                if nboccurs is None: # no need to check number of occurences
-                    return True
-                if not nboccurs: # too much occurences
-                    return False
-                nboccurs -= 1
-        if nboccurs == 0: # correct number of occurences
-            return True
-        return False # no matching tag/attrs
-
     def __contains__(self, text):
         return text in self.source