', 'ascii'), + 'hop

') + self.assertEquals(uilib.soup2xhtml('

hop', 'ascii'), + '

hop

') + self.assertEquals(uilib.soup2xhtml('hop

hop', 'ascii'), + 'hop

hop

') + + def test_soup2xhtml_1_2(self): + self.assertEquals(uilib.soup2xhtml('hop

', 'ascii'), + 'hop ') + self.assertEquals(uilib.soup2xhtml('

hop', 'ascii'), + '

hop') + self.assertEquals(uilib.soup2xhtml('hop

hop', 'ascii'), + '

hop

hop') + + def test_soup2xhtml_2_1(self): + self.assertEquals(uilib.soup2xhtml('hop ', 'ascii'), + 'hop ') + self.assertEquals(uilib.soup2xhtml(' hop', 'ascii'), + ' hop') + self.assertEquals(uilib.soup2xhtml('hop hop', 'ascii'), + 'hop hop') + + def test_soup2xhtml_2_2(self): + self.assertEquals(uilib.soup2xhtml('hop ', 'ascii'), + 'hop ') + self.assertEquals(uilib.soup2xhtml(' hop', 'ascii'), + ' hop') + self.assertEquals(uilib.soup2xhtml('hop hop', 'ascii'), + 'hop hop') + + def test_soup2xhtml_3_1(self): + self.assertEquals(uilib.soup2xhtml('hop ', 'ascii'), + 'hop ') + self.assertEquals(uilib.soup2xhtml(' hop', 'ascii'), + ' hop') + self.assertEquals(uilib.soup2xhtml('hop hop', 'ascii'), + 'hop hop') + + def test_soup2xhtml_3_2(self): + self.assertEquals(uilib.soup2xhtml('hop ', 'ascii'), + 'hop ') + self.assertEquals(uilib.soup2xhtml(' hop', 'ascii'), + ' hop') + self.assertEquals(uilib.soup2xhtml('hop hop', 'ascii'), + 'hop hop') + if __name__ == '__main__': unittest_main() diff -r b00cf7fbff31 -r 784025c15a3c uilib.py --- a/uilib.py Fri Jun 11 09:20:38 2010 +0200 +++ b/uilib.py Fri Jun 11 11:36:01 2010 +0200 @@ -18,9 +18,10 @@ # with CubicWeb. If not, see . """user interface libraries -contains some functions designed to help implementation of cubicweb user interface +contains some functions designed to help implementation of cubicweb user +interface. +""" -""" __docformat__ = "restructuredtext en" import csv @@ -123,7 +124,7 @@ fallback_safe_cut = safe_cut - +REM_ROOT_HTML_TAGS = re.compile('', re.U) try: from lxml import etree except (ImportError, AttributeError): @@ -133,12 +134,13 @@ def soup2xhtml(data, encoding): """tidy (at least try) html soup and return the result + Note: the function considers a string with no surrounding tag as valid if

`data`

can be parsed by an XML parser """ - # normalize line break - # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 - data = u'\n'.join(data.splitlines()) + # remove spurious and tags, then normalize line break + # (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1) + data = REM_ROOT_HTML_TAGS.sub('', u'\n'.join(data.splitlines())) # XXX lxml 1.1 support still needed ? xmltree = etree.HTML('

' % data) # NOTE: lxml 1.1 (etch platforms) doesn't recognize @@ -146,7 +148,13 @@ # why we specify an encoding and re-decode to unicode later body = etree.tostring(xmltree[0], encoding=encoding) # remove and and decode to unicode - return body[11:-13].decode(encoding) + snippet = body[6:-7].decode(encoding) + # take care to bad xhtml (for instance starting with

) which + # may mess with the

we added below. Only remove it if it's + # still there... + if snippet.startswith('

') and snippet.endswith('

'): + snippet = snippet[5:-6] + return snippet if hasattr(etree.HTML('

test

'), 'iter'):