30 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE) |
30 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE) |
31 |
31 |
32 ERR_COUNT = 0 |
32 ERR_COUNT = 0 |
33 |
33 |
34 class Validator(object): |
34 class Validator(object): |
35 |
35 parser = None |
36 def parse_string(self, data, sysid=None): |
36 |
|
37 def parse_string(self, source): |
|
38 etree = self._parse(self.preprocess_data(source)) |
|
39 return PageInfo(source, etree) |
|
40 |
|
41 def preprocess_data(self, data): |
|
42 return data |
|
43 |
|
44 def _parse(self, pdata): |
37 try: |
45 try: |
38 return PageInfo(self, data) |
46 return etree.fromstring(pdata, self.parser) |
39 except etree.XMLSyntaxError as exc: |
47 except etree.XMLSyntaxError as exc: |
40 def save_in(fname=''): |
48 def save_in(fname=''): |
41 file(fname, 'w').write(data) |
49 file(fname, 'w').write(data) |
42 new_exc = AssertionError(u'invalid document: %s' % exc) |
50 new_exc = AssertionError(u'invalid document: %s' % exc) |
43 new_exc.position = exc.position |
51 new_exc.position = exc.position |
44 raise new_exc |
52 raise new_exc |
45 |
|
46 def preprocess_data(self, data): |
|
47 return data |
|
48 |
53 |
49 |
54 |
50 class DTDValidator(Validator): |
55 class DTDValidator(Validator): |
51 def __init__(self): |
56 def __init__(self): |
52 Validator.__init__(self) |
57 Validator.__init__(self) |
153 return REM_SCRIPT_RGX.sub('', data) |
158 return REM_SCRIPT_RGX.sub('', data) |
154 |
159 |
155 |
160 |
156 class PageInfo(object): |
161 class PageInfo(object): |
157 """holds various informations on the view's output""" |
162 """holds various informations on the view's output""" |
158 def __init__(self, validator, source): |
163 def __init__(self, source, root): |
159 self.source = source |
164 self.source = source |
160 root = etree.fromstring(validator.preprocess_data(source), validator.parser) |
|
161 self.etree = root |
165 self.etree = root |
162 self.source = source |
|
163 self.raw_text = u''.join(root.xpath('//text()')) |
166 self.raw_text = u''.join(root.xpath('//text()')) |
164 self.namespace = self.etree.nsmap |
167 self.namespace = self.etree.nsmap |
165 self.default_ns = self.namespace.get(None) |
168 self.default_ns = self.namespace.get(None) |
166 self.a_tags = self.find_tag('a') |
169 self.a_tags = self.find_tag('a') |
167 self.h1_tags = self.find_tag('h1') |
170 self.h1_tags = self.find_tag('h1') |