# HG changeset patch # User Sylvain Thénault # Date 1246372956 -7200 # Node ID 1c73148ed9128f94dbcf374792ef5a264ce987e0 # Parent 5fad58e603f4f673d444c607df017d32393948c5 normalize line breaks (actually fix https://www.logilab.net/cwo/ticket/343754) diff -r 5fad58e603f4 -r 1c73148ed912 common/uilib.py --- a/common/uilib.py Tue Jun 30 15:25:46 2009 +0200 +++ b/common/uilib.py Tue Jun 30 16:42:36 2009 +0200 @@ -92,7 +92,9 @@ # fallback implementation, nicer one defined below if lxml is available def soup2xhtml(data, encoding): - return data + # normalize line break + # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + return u'\n'.join(data.splitlines()) # fallback implementation, nicer one defined below if lxml> 2.0 is available def safe_cut(text, length): @@ -123,6 +125,10 @@ Note: the function considers a string with no surrounding tag as valid if
`data`
can be parsed by an XML parser """ + # normalize line break + # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 + data = u'\n'.join(data.splitlines()) + # XXX lxml 1.1 support still needed ? xmltree = etree.HTML('
%s
' % data) # NOTE: lxml 1.1 (etch platforms) doesn't recognize # the encoding=unicode parameter (lxml 2.0 does), this is diff -r 5fad58e603f4 -r 1c73148ed912 test/unittest_entity.py --- a/test/unittest_entity.py Tue Jun 30 15:25:46 2009 +0200 +++ b/test/unittest_entity.py Tue Jun 30 16:42:36 2009 +0200 @@ -296,6 +296,8 @@ self.assertEquals(e.printable_value('content'), e['content']) e['content'] = u'été' self.assertEquals(e.printable_value('content'), e['content']) + e['content'] = u'hop\r\nhop\nhip\rmomo' + self.assertEquals(e.printable_value('content'), u'hop\nhop\nhip\nmomo') def test_fulltextindex(self):