diff -r 183da3addf0e -r 4d532f3c012e devtools/htmlparser.py --- a/devtools/htmlparser.py Fri Apr 24 19:46:47 2009 +0200 +++ b/devtools/htmlparser.py Fri Apr 24 19:49:39 2009 +0200 @@ -5,11 +5,13 @@ from lxml import etree from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE +STRICT_DOCTYPE = str(STRICT_DOCTYPE) +TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE) ERR_COUNT = 0 class Validator(object): - + def parse_string(self, data, sysid=None): try: data = self.preprocess_data(data) @@ -32,10 +34,10 @@ def preprocess_data(self, data): """used to fix potential blockquote mess generated by docutils""" - if str(STRICT_DOCTYPE) not in data: + if STRICT_DOCTYPE not in data: return data # parse using transitional DTD - data = data.replace(str(STRICT_DOCTYPE), str(TRANSITIONAL_DOCTYPE)) + data = data.replace(STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE) tree = etree.fromstring(data, self.parser) namespace = tree.nsmap.get(None) # this is the list of authorized child tags for
nodes @@ -51,9 +53,10 @@ parent = blockquote.getparent() parent.remove(blockquote) data = etree.tostring(tree) - return '%s\n%s' % (str(STRICT_DOCTYPE), data) + return '%s\n%s' % ( + STRICT_DOCTYPE, data) - + class SaxOnlyValidator(Validator): def __init__(self): @@ -66,7 +69,7 @@ Validator.__init__(self) self.parser = etree.HTMLParser() - + class PageInfo(object): """holds various informations on the view's output""" @@ -84,7 +87,7 @@ self.h4_tags = self.find_tag('h4') self.input_tags = self.find_tag('input') self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags] - + def find_tag(self, tag): """return a list which contains text of all "tag" elements """ if self.default_ns is None: @@ -94,14 +97,14 @@ if tag in ('a', 'input'): return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)] return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)] - + def appears(self, text): """returns True ifappears in the page""" return text in self.raw_text def __contains__(self, text): return text in self.source - + def has_title(self, text, level=None): """returns True if text @@ -131,7 +134,7 @@ if sre.match(title): return True return False - + def has_link(self, text, url=None): """returns True if text was found in the page""" for link_text, attrs in self.a_tags: @@ -145,7 +148,7 @@ except KeyError: continue return False - + def has_link_regexp(self, pattern, url=None): """returns True if pattern was found in the page""" sre = re.compile(pattern)