devtools/htmlparser.py
branchtls-sprint
changeset 1132 96752791c2b6
parent 781 323656dd85a9
child 1421 77ee26df178f
equal deleted inserted replaced
1131:544609e83317 1132:96752791c2b6
     1 """defines a validating HTML parser used in web application tests"""
     1 """defines a validating HTML parser used in web application tests"""
     2 
     2 
     3 import re
     3 import re
     4 from StringIO import StringIO
       
     5 
     4 
     6 from lxml import etree
     5 from lxml import etree
     7 from lxml.builder import E
       
     8 
     6 
     9 from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE, CW_XHTML_EXTENSIONS
     7 from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE, CW_XHTML_EXTENSIONS
    10 
     8 
    11 STRICT_DOCTYPE = str(STRICT_DOCTYPE % CW_XHTML_EXTENSIONS).strip()
     9 STRICT_DOCTYPE = str(STRICT_DOCTYPE % CW_XHTML_EXTENSIONS).strip()
    12 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE % CW_XHTML_EXTENSIONS).strip()
    10 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE % CW_XHTML_EXTENSIONS).strip()
    53             blockquotes = tree.findall('.//blockquote')
    51             blockquotes = tree.findall('.//blockquote')
    54         # quick and dirty approach: remove all blockquotes
    52         # quick and dirty approach: remove all blockquotes
    55         for blockquote in blockquotes:
    53         for blockquote in blockquotes:
    56             parent = blockquote.getparent()
    54             parent = blockquote.getparent()
    57             parent.remove(blockquote)
    55             parent.remove(blockquote)
    58 ##         # for each blockquote, wrap unauthorized child in a div
       
    59 ##         for blockquote in blockquotes:
       
    60 ##             if len(blockquote):
       
    61 ##                 needs_wrap = [(index, child) for index, child in enumerate(blockquote)
       
    62 ##                               if child.tag not in expected]
       
    63 ##                 for index, child in needs_wrap:
       
    64 ##                     # the child is automatically popped from blockquote when
       
    65 ##                     # its parent is changed
       
    66 ##                     div = E.div(child)
       
    67 ##                     blockquote.insert(index, div)
       
    68 ##             elif blockquote.text:
       
    69 ##                 div = E.div(blockquote.text)
       
    70 ##                 blockquote.text = None
       
    71 ##                 blockquote.append(div)
       
    72         data = etree.tostring(tree)
    56         data = etree.tostring(tree)
    73         return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (STRICT_DOCTYPE, data)
    57         return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (STRICT_DOCTYPE, data)
    74 
    58 
    75    
    59    
    76 class SaxOnlyValidator(Validator):
    60 class SaxOnlyValidator(Validator):