# HG changeset patch # User Sylvain Thénault # Date 1243359456 -7200 # Node ID 2b59d9ae17ae6d730c503d80f68294910578e28a # Parent a1b1d4f8482c2f22352f27890e102ae12ac8df85 new argument telling if we want text or (text / attrs), keeping bw compat diff -r a1b1d4f8482c -r 2b59d9ae17ae devtools/htmlparser.py --- a/devtools/htmlparser.py Tue May 26 19:36:40 2009 +0200 +++ b/devtools/htmlparser.py Tue May 26 19:37:36 2009 +0200 @@ -88,13 +88,13 @@ self.input_tags = self.find_tag('input') self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] - def find_tag(self, tag): + def find_tag(self, tag, gettext=True): """return a list which contains text of all "tag" elements """ if self.default_ns is None: iterstr = ".//%s" % tag else: iterstr = ".//{%s}%s" % (self.default_ns, tag) - if tag in ('a', 'input'): + if not gettext or tag in ('a', 'input'): return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)] return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)]