devtools/htmlparser.py
changeset 8939 30ce8eccfe3f
parent 8938 198fdadafed6
child 8940 ae898a084da2
equal deleted inserted replaced
8938:198fdadafed6 8939:30ce8eccfe3f
    33 
    33 
    34 class Validator(object):
    34 class Validator(object):
    35 
    35 
    36     def parse_string(self, data, sysid=None):
    36     def parse_string(self, data, sysid=None):
    37         try:
    37         try:
    38             data = self.preprocess_data(data)
    38             return PageInfo(self, data)
    39             return PageInfo(data, etree.fromstring(data, self.parser))
       
    40         except etree.XMLSyntaxError as exc:
    39         except etree.XMLSyntaxError as exc:
    41             def save_in(fname=''):
    40             def save_in(fname=''):
    42                 file(fname, 'w').write(data)
    41                 file(fname, 'w').write(data)
    43             new_exc = AssertionError(u'invalid xml %s' % exc)
    42             new_exc = AssertionError(u'invalid xml %s' % exc)
    44             new_exc.position = exc.position
    43             new_exc.position = exc.position
   117 
   116 
   118 
   117 
   119 
   118 
   120 class PageInfo(object):
   119 class PageInfo(object):
   121     """holds various informations on the view's output"""
   120     """holds various informations on the view's output"""
   122     def __init__(self, source, root):
   121     def __init__(self, validator, source):
   123         self.source = source
   122         self.source = source
       
   123         root = etree.fromstring(validator.preprocess_data(source), validator.parser)
   124         self.etree = root
   124         self.etree = root
   125         self.source = source
   125         self.source = source
   126         self.raw_text = u''.join(root.xpath('//text()'))
   126         self.raw_text = u''.join(root.xpath('//text()'))
   127         self.namespace = self.etree.nsmap
   127         self.namespace = self.etree.nsmap
   128         self.default_ns = self.namespace.get(None)
   128         self.default_ns = self.namespace.get(None)