--- a/devtools/htmlparser.py Mon May 06 16:05:51 2013 +0200
+++ b/devtools/htmlparser.py Mon May 06 17:13:43 2013 +0200
@@ -32,10 +32,18 @@
ERR_COUNT = 0
class Validator(object):
+ parser = None
- def parse_string(self, data, sysid=None):
+ def parse_string(self, source):
+ etree = self._parse(self.preprocess_data(source))
+ return PageInfo(source, etree)
+
+ def preprocess_data(self, data):
+ return data
+
+ def _parse(self, pdata):
try:
- return PageInfo(self, data)
+ return etree.fromstring(pdata, self.parser)
except etree.XMLSyntaxError as exc:
def save_in(fname=''):
file(fname, 'w').write(data)
@@ -43,9 +51,6 @@
new_exc.position = exc.position
raise new_exc
- def preprocess_data(self, data):
- return data
-
class DTDValidator(Validator):
def __init__(self):
@@ -155,11 +160,9 @@
class PageInfo(object):
"""holds various informations on the view's output"""
- def __init__(self, validator, source):
+ def __init__(self, source, root):
self.source = source
- root = etree.fromstring(validator.preprocess_data(source), validator.parser)
self.etree = root
- self.source = source
self.raw_text = u''.join(root.xpath('//text()'))
self.namespace = self.etree.nsmap
self.default_ns = self.namespace.get(None)