# HG changeset patch # User Sylvain Thénault # Date 1298018029 -3600 # Node ID e1605db1a9330ec8a28241f346a448a6411eb5b1 # Parent 7e402e92caa625e0d5860fb99c23d61e736fc26a# Parent 7e3e80f4179a5d7a3fa19d18ba940b205357d113 backport stable diff -r 7e402e92caa6 -r e1605db1a933 devtools/htmlparser.py --- a/devtools/htmlparser.py Fri Feb 18 08:04:10 2011 +0100 +++ b/devtools/htmlparser.py Fri Feb 18 09:33:49 2011 +0100 @@ -127,15 +127,41 @@ self.input_tags = self.find_tag('input') self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] - def iterstr(self, tag): + def _iterstr(self, tag): if self.default_ns is None: return ".//%s" % tag else: return ".//{%s}%s" % (self.default_ns, tag) + def matching_nodes(self, tag, **attrs): + for elt in self.etree.iterfind(self._iterstr(tag)): + eltattrs = elt.attrib + for attr, value in attrs.iteritems(): + try: + if eltattrs[attr] != value: + break + except KeyError: + break + else: # all attributes match + yield elt + + def has_tag(self, tag, nboccurs=1, **attrs): + """returns True if tag with given attributes appears in the page + `nbtimes` (any if None) + """ + for elt in self.matching_nodes(tag, **attrs): + if nboccurs is None: # no need to check number of occurences + return True + if not nboccurs: # too much occurences + return False + nboccurs -= 1 + if nboccurs == 0: # correct number of occurences + return True + return False # no matching tag/attrs + def find_tag(self, tag, gettext=True): """return a list which contains text of all "tag" elements """ - iterstr = self.iterstr(tag) + iterstr = self._iterstr(tag) if not gettext or tag in ('a', 'input'): return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)] @@ -146,28 +172,6 @@ """returns True if appears in the page""" return text in self.raw_text - def has_tag(self, tag, nboccurs=1, **attrs): - """returns True if tag with given attributes appears in the page - `nbtimes` (any if None) - """ - for elt in self.etree.iterfind(self.iterstr(tag)): - eltattrs = elt.attrib - for attr, value in attrs.iteritems(): - try: - if eltattrs[attr] != value: - break - except KeyError: - break - else: # all attributes match - if nboccurs is None: # no need to check number of occurences - return True - if not nboccurs: # too much occurences - return False - nboccurs -= 1 - if nboccurs == 0: # correct number of occurences - return True - return False # no matching tag/attrs - def __contains__(self, text): return text in self.source