cubicweb: comparison uilib.py

equal deleted inserted replaced

-:0b1f5c14646e
+:51ddb4842c56
 """tidy html soup by allowing some element tags and return the result
 """
 # remove spurious </body> and </html> tags, then normalize line break
 # (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1)
 data = REM_ROOT_HTML_TAGS.sub('', u'\n'.join(data.splitlines()))
-# XXX lxml 1.1 support still needed ?
 xmltree = etree.HTML(CLEANER.clean_html('<div>%s</div>' % data))
-# NOTE: lxml 1.1 (etch platforms) doesn't recognize
+# NOTE: lxml 2.0 does support encoding='unicode', but last time I (syt)
-#       the encoding=unicode parameter (lxml 2.0 does), this is
+# tried I got weird results (lxml 2.2.8)
-#       why we specify an encoding and re-decode to unicode later
 body = etree.tostring(xmltree[0], encoding=encoding)
 # remove <body> and </body> and decode to unicode
 snippet = body[6:-7].decode(encoding)
 # take care to bad xhtml (for instance starting with </div>) which
 # may mess with the <div> we added below. Only remove it if it's