1 """defines a validating HTML parser used in web application tests""" |
1 """defines a validating HTML parser used in web application tests""" |
2 |
2 |
3 import re |
3 import re |
4 from StringIO import StringIO |
|
5 |
4 |
6 from lxml import etree |
5 from lxml import etree |
7 from lxml.builder import E |
|
8 |
6 |
9 from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE, CW_XHTML_EXTENSIONS |
7 from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE, CW_XHTML_EXTENSIONS |
10 |
8 |
11 STRICT_DOCTYPE = str(STRICT_DOCTYPE % CW_XHTML_EXTENSIONS).strip() |
9 STRICT_DOCTYPE = str(STRICT_DOCTYPE % CW_XHTML_EXTENSIONS).strip() |
12 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE % CW_XHTML_EXTENSIONS).strip() |
10 TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE % CW_XHTML_EXTENSIONS).strip() |
53 blockquotes = tree.findall('.//blockquote') |
51 blockquotes = tree.findall('.//blockquote') |
54 # quick and dirty approach: remove all blockquotes |
52 # quick and dirty approach: remove all blockquotes |
55 for blockquote in blockquotes: |
53 for blockquote in blockquotes: |
56 parent = blockquote.getparent() |
54 parent = blockquote.getparent() |
57 parent.remove(blockquote) |
55 parent.remove(blockquote) |
58 ## # for each blockquote, wrap unauthorized child in a div |
|
59 ## for blockquote in blockquotes: |
|
60 ## if len(blockquote): |
|
61 ## needs_wrap = [(index, child) for index, child in enumerate(blockquote) |
|
62 ## if child.tag not in expected] |
|
63 ## for index, child in needs_wrap: |
|
64 ## # the child is automatically popped from blockquote when |
|
65 ## # its parent is changed |
|
66 ## div = E.div(child) |
|
67 ## blockquote.insert(index, div) |
|
68 ## elif blockquote.text: |
|
69 ## div = E.div(blockquote.text) |
|
70 ## blockquote.text = None |
|
71 ## blockquote.append(div) |
|
72 data = etree.tostring(tree) |
56 data = etree.tostring(tree) |
73 return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (STRICT_DOCTYPE, data) |
57 return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (STRICT_DOCTYPE, data) |
74 |
58 |
75 |
59 |
76 class SaxOnlyValidator(Validator): |
60 class SaxOnlyValidator(Validator): |