[htmlparser] store unaltered source in pageinfo.source
Do not store the parser to preserve the source aspect. Store the initial input
in the source argument instead.
This is very useful when parsing html. In such case we need to drop ``<script>``
tag.
--- a/devtools/htmlparser.py Thu Apr 25 18:38:47 2013 +0200
+++ b/devtools/htmlparser.py Fri Apr 26 11:52:35 2013 +0200
@@ -35,8 +35,7 @@
def parse_string(self, data, sysid=None):
try:
- data = self.preprocess_data(data)
- return PageInfo(data, etree.fromstring(data, self.parser))
+ return PageInfo(self, data)
except etree.XMLSyntaxError as exc:
def save_in(fname=''):
file(fname, 'w').write(data)
@@ -119,8 +118,9 @@
class PageInfo(object):
"""holds various informations on the view's output"""
- def __init__(self, source, root):
+ def __init__(self, validator, source):
self.source = source
+ root = etree.fromstring(validator.preprocess_data(source), validator.parser)
self.etree = root
self.source = source
self.raw_text = u''.join(root.xpath('//text()'))