125 self.h3_tags = self.find_tag('h3') |
125 self.h3_tags = self.find_tag('h3') |
126 self.h4_tags = self.find_tag('h4') |
126 self.h4_tags = self.find_tag('h4') |
127 self.input_tags = self.find_tag('input') |
127 self.input_tags = self.find_tag('input') |
128 self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
128 self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
129 |
129 |
130 def iterstr(self, tag): |
130 def _iterstr(self, tag): |
131 if self.default_ns is None: |
131 if self.default_ns is None: |
132 return ".//%s" % tag |
132 return ".//%s" % tag |
133 else: |
133 else: |
134 return ".//{%s}%s" % (self.default_ns, tag) |
134 return ".//{%s}%s" % (self.default_ns, tag) |
135 |
135 |
136 def find_tag(self, tag, gettext=True): |
136 def matching_nodes(self, tag, **attrs): |
137 """return a list which contains text of all "tag" elements """ |
137 for elt in self.etree.iterfind(self._iterstr(tag)): |
138 iterstr = self.iterstr(tag) |
|
139 if not gettext or tag in ('a', 'input'): |
|
140 return [(elt.text, elt.attrib) |
|
141 for elt in self.etree.iterfind(iterstr)] |
|
142 return [u''.join(elt.xpath('.//text()')) |
|
143 for elt in self.etree.iterfind(iterstr)] |
|
144 |
|
145 def appears(self, text): |
|
146 """returns True if <text> appears in the page""" |
|
147 return text in self.raw_text |
|
148 |
|
149 def has_tag(self, tag, nboccurs=1, **attrs): |
|
150 """returns True if tag with given attributes appears in the page |
|
151 `nbtimes` (any if None) |
|
152 """ |
|
153 for elt in self.etree.iterfind(self.iterstr(tag)): |
|
154 eltattrs = elt.attrib |
138 eltattrs = elt.attrib |
155 for attr, value in attrs.iteritems(): |
139 for attr, value in attrs.iteritems(): |
156 try: |
140 try: |
157 if eltattrs[attr] != value: |
141 if eltattrs[attr] != value: |
158 break |
142 break |
159 except KeyError: |
143 except KeyError: |
160 break |
144 break |
161 else: # all attributes match |
145 else: # all attributes match |
162 if nboccurs is None: # no need to check number of occurences |
146 yield elt |
163 return True |
147 |
164 if not nboccurs: # too much occurences |
148 def has_tag(self, tag, nboccurs=1, **attrs): |
165 return False |
149 """returns True if tag with given attributes appears in the page |
166 nboccurs -= 1 |
150 `nbtimes` (any if None) |
|
151 """ |
|
152 for elt in self.matching_nodes(tag, **attrs): |
|
153 if nboccurs is None: # no need to check number of occurences |
|
154 return True |
|
155 if not nboccurs: # too much occurences |
|
156 return False |
|
157 nboccurs -= 1 |
167 if nboccurs == 0: # correct number of occurences |
158 if nboccurs == 0: # correct number of occurences |
168 return True |
159 return True |
169 return False # no matching tag/attrs |
160 return False # no matching tag/attrs |
|
161 |
|
162 def find_tag(self, tag, gettext=True): |
|
163 """return a list which contains text of all "tag" elements """ |
|
164 iterstr = self._iterstr(tag) |
|
165 if not gettext or tag in ('a', 'input'): |
|
166 return [(elt.text, elt.attrib) |
|
167 for elt in self.etree.iterfind(iterstr)] |
|
168 return [u''.join(elt.xpath('.//text()')) |
|
169 for elt in self.etree.iterfind(iterstr)] |
|
170 |
|
171 def appears(self, text): |
|
172 """returns True if <text> appears in the page""" |
|
173 return text in self.raw_text |
170 |
174 |
171 def __contains__(self, text): |
175 def __contains__(self, text): |
172 return text in self.source |
176 return text in self.source |
173 |
177 |
174 def has_title(self, text, level=None): |
178 def has_title(self, text, level=None): |