author | Julien Cristau <julien.cristau@logilab.fr> |
Wed, 09 Jul 2014 15:40:50 +0200 | |
changeset 9905 | 1fa35cc06c69 |
parent 8979 | 8f5416b1562a |
child 10006 | 8391bf718485 |
permissions | -rw-r--r-- |
8937
8a1809c9a043
[htmlparser] add missing deprecation message
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8695
diff
changeset
|
1 |
# copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
5421
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
3 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
4 |
# This file is part of CubicWeb. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
5 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
9 |
# any later version. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
10 |
# |
5424
8ecbcbff9777
replace logilab-common by CubicWeb in disclaimer
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5421
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
5421
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
14 |
# details. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
15 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5276
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
6771
da71f1ad1721
minor code cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
18 |
"""defines a validating HTML parser used in web application tests""" |
0 | 19 |
|
20 |
import re |
|
3325
44caeccd2db9
fix sys import
Julien Jehannet <julien.jehannet@logilab.fr>
parents:
3151
diff
changeset
|
21 |
import sys |
8977
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
22 |
from xml import sax |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
23 |
from cStringIO import StringIO |
0 | 24 |
|
25 |
from lxml import etree |
|
26 |
||
8938
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
27 |
from logilab.common.deprecation import class_deprecated, class_renamed |
6772
68bb0943d192
[test, html validation] make validator selection somewhat smarter (at least it works properly when content is demoted from xhtml to html, making the XMLDemotingValidator class useless
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6771
diff
changeset
|
28 |
|
1421
77ee26df178f
doc type handling refactoring: do the ext substitution at the module level
sylvain.thenault@logilab.fr
parents:
1132
diff
changeset
|
29 |
from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE |
6771
da71f1ad1721
minor code cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
30 |
|
1485 | 31 |
STRICT_DOCTYPE = str(STRICT_DOCTYPE) |
32 |
TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE) |
|
0 | 33 |
|
34 |
ERR_COUNT = 0 |
|
35 |
||
8977
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
36 |
_REM_SCRIPT_RGX = re.compile(r"<script[^>]*>.*?</script>", re.U|re.M|re.I|re.S) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
37 |
def _remove_script_tags(data): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
38 |
"""Remove the script (usually javascript) tags to help the lxml |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
39 |
XMLParser / HTMLParser do their job. Without that, they choke on |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
40 |
tags embedded in JS strings. |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
41 |
""" |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
42 |
# Notice we may want to use lxml cleaner, but it's far too intrusive: |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
43 |
# |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
44 |
# cleaner = Cleaner(scripts=True, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
45 |
# javascript=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
46 |
# comments=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
47 |
# style=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
48 |
# links=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
49 |
# meta=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
50 |
# page_structure=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
51 |
# processing_instructions=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
52 |
# embedded=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
53 |
# frames=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
54 |
# forms=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
55 |
# annoying_tags=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
56 |
# remove_tags=(), |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
57 |
# remove_unknown_tags=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
58 |
# safe_attrs_only=False, |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
59 |
# add_nofollow=False) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
60 |
# >>> cleaner.clean_html('<body></body>') |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
61 |
# '<span></span>' |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
62 |
# >>> cleaner.clean_html('<!DOCTYPE html><body></body>') |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
63 |
# '<html><body></body></html>' |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
64 |
# >>> cleaner.clean_html('<body><div/></body>') |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
65 |
# '<div></div>' |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
66 |
# >>> cleaner.clean_html('<html><body><div/><br></body><html>') |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
67 |
# '<html><body><div></div><br></body></html>' |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
68 |
# >>> cleaner.clean_html('<html><body><div/><br><span></body><html>') |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
69 |
# '<html><body><div></div><br><span></span></body></html>' |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
70 |
# |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
71 |
# using that, we'll miss most actual validation error we want to |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
72 |
# catch. For now, use dumb regexp |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
73 |
return _REM_SCRIPT_RGX.sub('', data) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
74 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
75 |
|
0 | 76 |
class Validator(object): |
8977
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
77 |
""" base validator API """ |
8973
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
78 |
parser = None |
1485 | 79 |
|
8973
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
80 |
def parse_string(self, source): |
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
81 |
etree = self._parse(self.preprocess_data(source)) |
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
82 |
return PageInfo(source, etree) |
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
83 |
|
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
84 |
def preprocess_data(self, data): |
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
85 |
return data |
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
86 |
|
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
87 |
def _parse(self, pdata): |
0 | 88 |
try: |
8973
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
89 |
return etree.fromstring(pdata, self.parser) |
8695
358d8bed9626
[toward-py3k] rewrite to "except AnException as exc:" (part of #2711624)
Nicolas Chauvat <nicolas.chauvat@logilab.fr>
parents:
7014
diff
changeset
|
90 |
except etree.XMLSyntaxError as exc: |
0 | 91 |
def save_in(fname=''): |
92 |
file(fname, 'w').write(data) |
|
8941
7b26fe71404f
drop xhtml content-type support (closes #2065651)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
8940
diff
changeset
|
93 |
new_exc = AssertionError(u'invalid document: %s' % exc) |
0 | 94 |
new_exc.position = exc.position |
95 |
raise new_exc |
|
96 |
||
97 |
||
98 |
class DTDValidator(Validator): |
|
99 |
def __init__(self): |
|
100 |
Validator.__init__(self) |
|
3151 | 101 |
# XXX understand what's happening under windows |
6771
da71f1ad1721
minor code cleanup
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5424
diff
changeset
|
102 |
self.parser = etree.XMLParser(dtd_validation=sys.platform != 'win32') |
0 | 103 |
|
104 |
def preprocess_data(self, data): |
|
105 |
"""used to fix potential blockquote mess generated by docutils""" |
|
1485 | 106 |
if STRICT_DOCTYPE not in data: |
0 | 107 |
return data |
108 |
# parse using transitional DTD |
|
1485 | 109 |
data = data.replace(STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE) |
8979
8f5416b1562a
[devtools] use self._parse so AssertionError is properly raised instead of lxml error (test failure introduced in 6711f78c18be)
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8977
diff
changeset
|
110 |
tree = self._parse(data) |
0 | 111 |
namespace = tree.nsmap.get(None) |
112 |
# this is the list of authorized child tags for <blockquote> nodes |
|
113 |
expected = 'p h1 h2 h3 h4 h5 h6 div ul ol dl pre hr blockquote address ' \ |
|
114 |
'fieldset table form noscript ins del script'.split() |
|
115 |
if namespace: |
|
116 |
blockquotes = tree.findall('.//{%s}blockquote' % namespace) |
|
117 |
expected = ['{%s}%s' % (namespace, tag) for tag in expected] |
|
118 |
else: |
|
119 |
blockquotes = tree.findall('.//blockquote') |
|
120 |
# quick and dirty approach: remove all blockquotes |
|
121 |
for blockquote in blockquotes: |
|
122 |
parent = blockquote.getparent() |
|
123 |
parent.remove(blockquote) |
|
124 |
data = etree.tostring(tree) |
|
1485 | 125 |
return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % ( |
126 |
STRICT_DOCTYPE, data) |
|
0 | 127 |
|
1485 | 128 |
|
8938
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
129 |
class XMLValidator(Validator): |
8977
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
130 |
"""XML validator, checks that XML is well-formed and used XMLNS are defined""" |
0 | 131 |
|
132 |
def __init__(self): |
|
133 |
Validator.__init__(self) |
|
134 |
self.parser = etree.XMLParser() |
|
135 |
||
8938
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
136 |
SaxOnlyValidator = class_renamed('SaxOnlyValidator', |
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
137 |
XMLValidator, |
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
138 |
'[3.17] you should use the ' |
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
139 |
'XMLValidator class instead') |
198fdadafed6
[htmlparser] rename SaxOnlyValidator to XMLValidator
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8937
diff
changeset
|
140 |
|
8977
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
141 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
142 |
class XMLSyntaxValidator(Validator): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
143 |
"""XML syntax validator, check XML is well-formed""" |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
144 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
145 |
class MySaxErrorHandler(sax.ErrorHandler): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
146 |
"""override default handler to avoid choking because of unknown entity""" |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
147 |
def fatalError(self, exception): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
148 |
# XXX check entity in htmlentitydefs |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
149 |
if not str(exception).endswith('undefined entity'): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
150 |
raise exception |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
151 |
_parser = sax.make_parser() |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
152 |
_parser.setContentHandler(sax.handler.ContentHandler()) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
153 |
_parser.setErrorHandler(MySaxErrorHandler()) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
154 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
155 |
def __init__(self): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
156 |
super(XMLSyntaxValidator, self).__init__() |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
157 |
# XMLParser() wants xml namespaces defined |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
158 |
# XMLParser(recover=True) will accept almost anything |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
159 |
# |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
160 |
# -> use the later but preprocess will check xml well-formness using a |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
161 |
# dumb SAX parser |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
162 |
self.parser = etree.XMLParser(recover=True) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
163 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
164 |
def preprocess_data(self, data): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
165 |
return _remove_script_tags(data) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
166 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
167 |
def _parse(self, data): |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
168 |
inpsrc = sax.InputSource() |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
169 |
inpsrc.setByteStream(StringIO(data)) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
170 |
try: |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
171 |
self._parser.parse(inpsrc) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
172 |
except sax.SAXParseException, exc: |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
173 |
new_exc = AssertionError(u'invalid document: %s' % exc) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
174 |
new_exc.position = (exc._linenum, exc._colnum) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
175 |
raise new_exc |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
176 |
return super(XMLSyntaxValidator, self)._parse(data) |
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
177 |
|
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
178 |
|
8951
5cfc0387d63f
[devtools] Use XMLValidator instead deprecated SaxOnlyValidator
Pierre-Yves David <pierre-yves.david@logilab.fr>
parents:
8941
diff
changeset
|
179 |
class XMLDemotingValidator(XMLValidator): |
5276
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
180 |
""" some views produce html instead of xhtml, using demote_to_html |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
181 |
|
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
182 |
this is typically related to the use of external dependencies |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
183 |
which do not produce valid xhtml (google maps, ...) |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
184 |
""" |
6772
68bb0943d192
[test, html validation] make validator selection somewhat smarter (at least it works properly when content is demoted from xhtml to html, making the XMLDemotingValidator class useless
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6771
diff
changeset
|
185 |
__metaclass__ = class_deprecated |
8937
8a1809c9a043
[htmlparser] add missing deprecation message
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8695
diff
changeset
|
186 |
__deprecation_warning__ = '[3.10] this is now handled in testlib.py' |
5276
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
187 |
|
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
188 |
def preprocess_data(self, data): |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
189 |
if data.startswith('<?xml'): |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
190 |
self.parser = etree.XMLParser() |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
191 |
else: |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
192 |
self.parser = etree.HTMLParser() |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
193 |
return data |
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
194 |
|
5037d891e207
[devtools/validators] add an Xml validator able to degrade to Html validation because of views perusing demote_to_html
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents:
4252
diff
changeset
|
195 |
|
0 | 196 |
class HTMLValidator(Validator): |
197 |
||
198 |
def __init__(self): |
|
199 |
Validator.__init__(self) |
|
8940
ae898a084da2
[htmlparser] exclude <script> tag from html source
Pierre-Yves David <pierre-yves.david@logilab.fr>
parents:
8939
diff
changeset
|
200 |
self.parser = etree.HTMLParser(recover=False) |
0 | 201 |
|
8940
ae898a084da2
[htmlparser] exclude <script> tag from html source
Pierre-Yves David <pierre-yves.david@logilab.fr>
parents:
8939
diff
changeset
|
202 |
def preprocess_data(self, data): |
8977
57e564c0118e
[testlib] introduce a validator that check xml-well formness
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8973
diff
changeset
|
203 |
return _remove_script_tags(data) |
1485 | 204 |
|
0 | 205 |
|
206 |
class PageInfo(object): |
|
207 |
"""holds various informations on the view's output""" |
|
8973
6711f78c18be
[testlib] unspaghettify Validator / PageInfo api
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8972
diff
changeset
|
208 |
def __init__(self, source, root): |
0 | 209 |
self.source = source |
210 |
self.etree = root |
|
211 |
self.raw_text = u''.join(root.xpath('//text()')) |
|
212 |
self.namespace = self.etree.nsmap |
|
213 |
self.default_ns = self.namespace.get(None) |
|
214 |
self.a_tags = self.find_tag('a') |
|
215 |
self.h1_tags = self.find_tag('h1') |
|
216 |
self.h2_tags = self.find_tag('h2') |
|
217 |
self.h3_tags = self.find_tag('h3') |
|
218 |
self.h4_tags = self.find_tag('h4') |
|
219 |
self.input_tags = self.find_tag('input') |
|
220 |
self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] |
|
1485 | 221 |
|
7014
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
222 |
def _iterstr(self, tag): |
6977
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
223 |
if self.default_ns is None: |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
224 |
return ".//%s" % tag |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
225 |
else: |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
226 |
return ".//{%s}%s" % (self.default_ns, tag) |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
227 |
|
7014
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
228 |
def matching_nodes(self, tag, **attrs): |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
229 |
for elt in self.etree.iterfind(self._iterstr(tag)): |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
230 |
eltattrs = elt.attrib |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
231 |
for attr, value in attrs.iteritems(): |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
232 |
try: |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
233 |
if eltattrs[attr] != value: |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
234 |
break |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
235 |
except KeyError: |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
236 |
break |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
237 |
else: # all attributes match |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
238 |
yield elt |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
239 |
|
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
240 |
def has_tag(self, tag, nboccurs=1, **attrs): |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
241 |
"""returns True if tag with given attributes appears in the page |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
242 |
`nbtimes` (any if None) |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
243 |
""" |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
244 |
for elt in self.matching_nodes(tag, **attrs): |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
245 |
if nboccurs is None: # no need to check number of occurences |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
246 |
return True |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
247 |
if not nboccurs: # too much occurences |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
248 |
return False |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
249 |
nboccurs -= 1 |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
250 |
if nboccurs == 0: # correct number of occurences |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
251 |
return True |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
252 |
return False # no matching tag/attrs |
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
253 |
|
1945
2b59d9ae17ae
new argument telling if we want text or (text / attrs), keeping bw compat
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1485
diff
changeset
|
254 |
def find_tag(self, tag, gettext=True): |
0 | 255 |
"""return a list which contains text of all "tag" elements """ |
7014
7e3e80f4179a
[testlib pageinfo] extract matching_node method from has_tag to ease looking for a node with a given set of attributes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6977
diff
changeset
|
256 |
iterstr = self._iterstr(tag) |
1945
2b59d9ae17ae
new argument telling if we want text or (text / attrs), keeping bw compat
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1485
diff
changeset
|
257 |
if not gettext or tag in ('a', 'input'): |
6977
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
258 |
return [(elt.text, elt.attrib) |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
259 |
for elt in self.etree.iterfind(iterstr)] |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
260 |
return [u''.join(elt.xpath('.//text()')) |
cb78108bf603
[testlib] new method on page info object to ensure some tag with arbitrary attributes is found
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
6772
diff
changeset
|
261 |
for elt in self.etree.iterfind(iterstr)] |
1485 | 262 |
|
0 | 263 |
def appears(self, text): |
264 |
"""returns True if <text> appears in the page""" |
|
265 |
return text in self.raw_text |
|
266 |
||
267 |
def __contains__(self, text): |
|
268 |
return text in self.source |
|
1485 | 269 |
|
0 | 270 |
def has_title(self, text, level=None): |
271 |
"""returns True if <h?>text</h?> |
|
272 |
||
273 |
:param level: the title's level (1 for h1, 2 for h2, etc.) |
|
274 |
""" |
|
275 |
if level is None: |
|
276 |
for hlist in self.title_tags: |
|
277 |
if text in hlist: |
|
278 |
return True |
|
279 |
return False |
|
280 |
else: |
|
281 |
hlist = self.title_tags[level - 1] |
|
282 |
return text in hlist |
|
283 |
||
284 |
def has_title_regexp(self, pattern, level=None): |
|
285 |
"""returns True if <h?>pattern</h?>""" |
|
286 |
sre = re.compile(pattern) |
|
287 |
if level is None: |
|
288 |
for hlist in self.title_tags: |
|
289 |
for title in hlist: |
|
290 |
if sre.match(title): |
|
291 |
return True |
|
292 |
return False |
|
293 |
else: |
|
294 |
hlist = self.title_tags[level - 1] |
|
295 |
for title in hlist: |
|
296 |
if sre.match(title): |
|
297 |
return True |
|
298 |
return False |
|
1485 | 299 |
|
0 | 300 |
def has_link(self, text, url=None): |
301 |
"""returns True if <a href=url>text</a> was found in the page""" |
|
302 |
for link_text, attrs in self.a_tags: |
|
303 |
if text == link_text: |
|
304 |
if url is None: |
|
305 |
return True |
|
306 |
try: |
|
307 |
href = attrs['href'] |
|
308 |
if href == url: |
|
309 |
return True |
|
310 |
except KeyError: |
|
311 |
continue |
|
312 |
return False |
|
1485 | 313 |
|
0 | 314 |
def has_link_regexp(self, pattern, url=None): |
315 |
"""returns True if <a href=url>pattern</a> was found in the page""" |
|
316 |
sre = re.compile(pattern) |
|
317 |
for link_text, attrs in self.a_tags: |
|
318 |
if sre.match(link_text): |
|
319 |
if url is None: |
|
320 |
return True |
|
321 |
try: |
|
322 |
href = attrs['href'] |
|
323 |
if href == url: |
|
324 |
return True |
|
325 |
except KeyError: |
|
326 |
continue |
|
327 |
return False |
|
2773
b2530e3e0afb
[testlib] #345052 and #344207: major test lib refactoring/cleanup + update usage
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
328 |
|
8972
771337c3a754
[testlib] update htmlparsers.VALMAP: stop using SaxOnlyValidator and add an entry for html
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8951
diff
changeset
|
329 |
VALMAP = {None: None, |
771337c3a754
[testlib] update htmlparsers.VALMAP: stop using SaxOnlyValidator and add an entry for html
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8951
diff
changeset
|
330 |
'dtd': DTDValidator, |
771337c3a754
[testlib] update htmlparsers.VALMAP: stop using SaxOnlyValidator and add an entry for html
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8951
diff
changeset
|
331 |
'xml': XMLValidator, |
771337c3a754
[testlib] update htmlparsers.VALMAP: stop using SaxOnlyValidator and add an entry for html
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8951
diff
changeset
|
332 |
'html': HTMLValidator, |
771337c3a754
[testlib] update htmlparsers.VALMAP: stop using SaxOnlyValidator and add an entry for html
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
8951
diff
changeset
|
333 |
} |