devtools/htmlparser.py
branchtls-sprint
changeset 1485 4d532f3c012e
parent 1480 d3e3d527daf5
child 1945 2b59d9ae17ae
--- a/devtools/htmlparser.py	Fri Apr 24 19:46:47 2009 +0200
+++ b/devtools/htmlparser.py	Fri Apr 24 19:49:39 2009 +0200
@@ -5,11 +5,13 @@
 from lxml import etree
 
 from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE
+STRICT_DOCTYPE = str(STRICT_DOCTYPE)
+TRANSITIONAL_DOCTYPE = str(TRANSITIONAL_DOCTYPE)
 
 ERR_COUNT = 0
 
 class Validator(object):
-    
+
     def parse_string(self, data, sysid=None):
         try:
             data = self.preprocess_data(data)
@@ -32,10 +34,10 @@
 
     def preprocess_data(self, data):
         """used to fix potential blockquote mess generated by docutils"""
-        if str(STRICT_DOCTYPE) not in data:
+        if STRICT_DOCTYPE not in data:
             return data
         # parse using transitional DTD
-        data = data.replace(str(STRICT_DOCTYPE), str(TRANSITIONAL_DOCTYPE))
+        data = data.replace(STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE)
         tree = etree.fromstring(data, self.parser)
         namespace = tree.nsmap.get(None)
         # this is the list of authorized child tags for <blockquote> nodes
@@ -51,9 +53,10 @@
             parent = blockquote.getparent()
             parent.remove(blockquote)
         data = etree.tostring(tree)
-        return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (str(STRICT_DOCTYPE), data)
+        return '<?xml version="1.0" encoding="UTF-8"?>%s\n%s' % (
+            STRICT_DOCTYPE, data)
 
-   
+
 class SaxOnlyValidator(Validator):
 
     def __init__(self):
@@ -66,7 +69,7 @@
         Validator.__init__(self)
         self.parser = etree.HTMLParser()
 
-    
+
 
 class PageInfo(object):
     """holds various informations on the view's output"""
@@ -84,7 +87,7 @@
         self.h4_tags = self.find_tag('h4')
         self.input_tags = self.find_tag('input')
         self.title_tags = [self.h1_tags, self.h2_tags, self.h3_tags, self.h4_tags]
-        
+
     def find_tag(self, tag):
         """return a list which contains text of all "tag" elements """
         if self.default_ns is None:
@@ -94,14 +97,14 @@
         if tag in ('a', 'input'):
             return [(elt.text, elt.attrib) for elt in self.etree.iterfind(iterstr)]
         return [u''.join(elt.xpath('.//text()')) for elt in self.etree.iterfind(iterstr)]
-         
+
     def appears(self, text):
         """returns True if <text> appears in the page"""
         return text in self.raw_text
 
     def __contains__(self, text):
         return text in self.source
-    
+
     def has_title(self, text, level=None):
         """returns True if <h?>text</h?>
 
@@ -131,7 +134,7 @@
                 if sre.match(title):
                     return True
             return False
-    
+
     def has_link(self, text, url=None):
         """returns True if <a href=url>text</a> was found in the page"""
         for link_text, attrs in self.a_tags:
@@ -145,7 +148,7 @@
                 except KeyError:
                     continue
         return False
-    
+
     def has_link_regexp(self, pattern, url=None):
         """returns True if <a href=url>pattern</a> was found in the page"""
         sre = re.compile(pattern)