125 self.h3_tags = self.find_tag('h3') |
125 self.h3_tags = self.find_tag('h3') |
126 self.h4_tags = self.find_tag('h4') |
126 self.h4_tags = self.find_tag('h4') |
127 self.input_tags = self.find_tag('input') |
127 self.input_tags = self.find_tag('input') |
128 self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
128 self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
129 |
129 |
|
130 def iterstr(self, tag): |
|
131 if self.default_ns is None: |
|
132 return ".//%s" % tag |
|
133 else: |
|
134 return ".//{%s}%s" % (self.default_ns, tag) |
|
135 |
130 def find_tag(self, tag, gettext=True): |
136 def find_tag(self, tag, gettext=True): |
131 """return a list which contains text of all "tag" elements """ |
137 """return a list which contains text of all "tag" elements """ |
132 if self.default_ns is None: |
138 iterstr = self.iterstr(tag) |
133 iterstr = ".//%s" % tag |
|
134 else: |
|
135 iterstr = ".//{%s}%s" % (self.default_ns, tag) |
|
136 if not gettext or tag in ('a', 'input'): |
139 if not gettext or tag in ('a', 'input'): |
137 return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)] |
140 return [(elt.text, elt.attrib) |
138 return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)] |
141 for elt in self.etree.iterfind(iterstr)] |
|
142 return [u''.join(elt.xpath('.//text()')) |
|
143 for elt in self.etree.iterfind(iterstr)] |
139 |
144 |
140 def appears(self, text): |
145 def appears(self, text): |
141 """returns True if <text> appears in the page""" |
146 """returns True if <text> appears in the page""" |
142 return text in self.raw_text |
147 return text in self.raw_text |
|
148 |
|
149 def has_tag(self, tag, nboccurs=1, **attrs): |
|
150 """returns True if tag with given attributes appears in the page |
|
151 `nbtimes` (any if None) |
|
152 """ |
|
153 for elt in self.etree.iterfind(self.iterstr(tag)): |
|
154 eltattrs = elt.attrib |
|
155 for attr, value in attrs.iteritems(): |
|
156 try: |
|
157 if eltattrs[attr] != value: |
|
158 break |
|
159 except KeyError: |
|
160 break |
|
161 else: # all attributes match |
|
162 if nboccurs is None: # no need to check number of occurences |
|
163 return True |
|
164 if not nboccurs: # too much occurences |
|
165 return False |
|
166 nboccurs -= 1 |
|
167 if nboccurs == 0: # correct number of occurences |
|
168 return True |
|
169 return False # no matching tag/attrs |
143 |
170 |
144 def __contains__(self, text): |
171 def __contains__(self, text): |
145 return text in self.source |
172 return text in self.source |
146 |
173 |
147 def has_title(self, text, level=None): |
174 def has_title(self, text, level=None): |