cubicweb: comparison uilib.py

equal deleted inserted replaced

-:b00cf7fbff31
+:784025c15a3c
 #
 # You should have received a copy of the GNU Lesser General Public License along
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
 """user interface libraries
-contains some functions designed to help implementation of cubicweb user interface
+contains some functions designed to help implementation of cubicweb user
+interface.
 """
 __docformat__ = "restructuredtext en"
 import csv
 import re
 from StringIO import StringIO
 # else if un-tagged text is too long, cut it
 return xml_escape(text_nohtml[:length] + u'...')
 fallback_safe_cut = safe_cut
+REM_ROOT_HTML_TAGS = re.compile('</(body|html)>', re.U)
 try:
 from lxml import etree
 except (ImportError, AttributeError):
 # gae environment: lxml not available
 pass
 else:
 def soup2xhtml(data, encoding):
 """tidy (at least try) html soup and return the result
 Note: the function considers a string with no surrounding tag as valid
 if <div>`data`</div> can be parsed by an XML parser
 """
-# normalize line break
+# remove spurious </body> and </html> tags, then normalize line break
-# see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
+# (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1)
-data = u'\n'.join(data.splitlines())
+data = REM_ROOT_HTML_TAGS.sub('', u'\n'.join(data.splitlines()))
 # XXX lxml 1.1 support still needed ?
 xmltree = etree.HTML('<div>%s</div>' % data)
 # NOTE: lxml 1.1 (etch platforms) doesn't recognize
 #       the encoding=unicode parameter (lxml 2.0 does), this is
 #       why we specify an encoding and re-decode to unicode later
 body = etree.tostring(xmltree[0], encoding=encoding)
 # remove <body> and </body> and decode to unicode
-return body[11:-13].decode(encoding)
+snippet = body[6:-7].decode(encoding)
+# take care to bad xhtml (for instance starting with </div>) which
+# may mess with the <div> we added below. Only remove it if it's
+# still there...
+if snippet.startswith('<div>') and snippet.endswith('</div>'):
+snippet = snippet[5:-6]
+return snippet
 if hasattr(etree.HTML('<div>test</div>'), 'iter'):
 def safe_cut(text, length):
 """returns an html document of length <length> based on <text>,

branch	stable
changeset 5730	784025c15a3c
parent 5424	8ecbcbff9777
child 5949	2a273c896a38