--- a/devtools/htmlparser.py Fri Apr 24 19:46:47 2009 +0200
+++ b/devtools/htmlparser.py Fri Apr 24 19:49:39 2009 +0200
@@ -5,11 +5,13 @@
from lxml import etree
from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE
+STRICT_DOCTYPE = str(STRICT_DOCTYPE)
+TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE)
ERR_COUNT = 0
class Validator(object):
-
+
def parse_string(self, data, sysid=None):
try:
data = self.preprocess_data(data)
@@ -32,10 +34,10 @@
def preprocess_data(self, data):
"""used to fix potential blockquote mess generated by docutils"""
- if str(STRICT_DOCTYPE) not in data:
+ if STRICT_DOCTYPE not in data:
return data
# parse using transitional DTD
- data = data.replace(str(STRICT_DOCTYPE), str(TRANSITIONAL_DOCTYPE))
+ data = data.replace(STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE)
tree = etree.fromstring(data, self.parser)
namespace = tree.nsmap.get(None)
# this is the list of authorized child tags for <blockquote> nodes
@@ -51,9 +53,10 @@
parent = blockquote.getparent()
parent.remove(blockquote)
data = etree.tostring(tree)
- return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (str(STRICT_DOCTYPE), data)
+ return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (
+ STRICT_DOCTYPE, data)
-
+
class SaxOnlyValidator(Validator):
def __init__(self):
@@ -66,7 +69,7 @@
Validator.__init__(self)
self.parser = etree.HTMLParser()
-
+
class PageInfo(object):
"""holds various informations on the view's output"""
@@ -84,7 +87,7 @@
self.h4_tags = self.find_tag('h4')
self.input_tags = self.find_tag('input')
self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags]
-
+
def find_tag(self, tag):
"""return a list which contains text of all "tag" elements """
if self.default_ns is None:
@@ -94,14 +97,14 @@
if tag in ('a', 'input'):
return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)]
return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)]
-
+
def appears(self, text):
"""returns True if <text> appears in the page"""
return text in self.raw_text
def __contains__(self, text):
return text in self.source
-
+
def has_title(self, text, level=None):
"""returns True if <h?>text</h?>
@@ -131,7 +134,7 @@
if sre.match(title):
return True
return False
-
+
def has_link(self, text, url=None):
"""returns True if <a href=url>text</a> was found in the page"""
for link_text, attrs in self.a_tags:
@@ -145,7 +148,7 @@
except KeyError:
continue
return False
-
+
def has_link_regexp(self, pattern, url=None):
"""returns True if <a href=url>pattern</a> was found in the page"""
sre = re.compile(pattern)