devtools/htmlparser.py
changeset 8979 8f5416b1562a
parent 8977 57e564c0118e
child 10006 8391bf718485
equal deleted inserted replaced
8978:269548f2306e 8979:8f5416b1562a
   105         """used to fix potential blockquote mess generated by docutils"""
   105         """used to fix potential blockquote mess generated by docutils"""
   106         if STRICT_DOCTYPE not in data:
   106         if STRICT_DOCTYPE not in data:
   107             return data
   107             return data
   108         # parse using transitional DTD
   108         # parse using transitional DTD
   109         data = data.replace(STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE)
   109         data = data.replace(STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE)
   110         tree = etree.fromstring(data, self.parser)
   110         tree = self._parse(data)
   111         namespace = tree.nsmap.get(None)
   111         namespace = tree.nsmap.get(None)
   112         # this is the list of authorized child tags for <blockquote> nodes
   112         # this is the list of authorized child tags for <blockquote> nodes
   113         expected = 'p h1 h2 h3 h4 h5 h6 div ul ol dl pre hr blockquote address ' \
   113         expected = 'p h1 h2 h3 h4 h5 h6 div ul ol dl pre hr blockquote address ' \
   114                    'fieldset table form noscript ins del script'.split()
   114                    'fieldset table form noscript ins del script'.split()
   115         if namespace:
   115         if namespace: