86 self.h3_tags = self.find_tag('h3') |
86 self.h3_tags = self.find_tag('h3') |
87 self.h4_tags = self.find_tag('h4') |
87 self.h4_tags = self.find_tag('h4') |
88 self.input_tags = self.find_tag('input') |
88 self.input_tags = self.find_tag('input') |
89 self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
89 self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
90 |
90 |
91 def find_tag(self, tag): |
91 def find_tag(self, tag, gettext=True): |
92 """return a list which contains text of all "tag" elements """ |
92 """return a list which contains text of all "tag" elements """ |
93 if self.default_ns is None: |
93 if self.default_ns is None: |
94 iterstr = ".//%s" % tag |
94 iterstr = ".//%s" % tag |
95 else: |
95 else: |
96 iterstr = ".//{%s}%s" % (self.default_ns, tag) |
96 iterstr = ".//{%s}%s" % (self.default_ns, tag) |
97 if tag in ('a', 'input'): |
97 if not gettext or tag in ('a', 'input'): |
98 return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)] |
98 return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)] |
99 return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)] |
99 return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)] |
100 |
100 |
101 def appears(self, text): |
101 def appears(self, text): |
102 """returns True if <text> appears in the page""" |
102 """returns True if <text> appears in the page""" |