normalize line breaks (actually fix https://www.logilab.net/cwo/ticket/343754) stable
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 30 Jun 2009 16:42:36 +0200
branchstable
changeset 2208 1c73148ed912
parent 2207 5fad58e603f4
child 2213 713e220e7425
normalize line breaks (actually fix https://www.logilab.net/cwo/ticket/343754)
common/uilib.py
test/unittest_entity.py
--- a/common/uilib.py	Tue Jun 30 15:25:46 2009 +0200
+++ b/common/uilib.py	Tue Jun 30 16:42:36 2009 +0200
@@ -92,7 +92,9 @@
 
 # fallback implementation, nicer one defined below if lxml is available
 def soup2xhtml(data, encoding):
-    return data
+    # normalize line break
+    # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
+    return u'\n'.join(data.splitlines())
 
 # fallback implementation, nicer one defined below if lxml> 2.0 is available
 def safe_cut(text, length):
@@ -123,6 +125,10 @@
         Note: the function considers a string with no surrounding tag as valid
               if <div>`data`</div> can be parsed by an XML parser
         """
+        # normalize line break
+        # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
+        data = u'\n'.join(data.splitlines())
+        # XXX lxml 1.1 support still needed ?
         xmltree = etree.HTML('<div>%s</div>' % data)
         # NOTE: lxml 1.1 (etch platforms) doesn't recognize
         #       the encoding=unicode parameter (lxml 2.0 does), this is
--- a/test/unittest_entity.py	Tue Jun 30 15:25:46 2009 +0200
+++ b/test/unittest_entity.py	Tue Jun 30 16:42:36 2009 +0200
@@ -296,6 +296,8 @@
         self.assertEquals(e.printable_value('content'), e['content'])
         e['content'] = u'été'
         self.assertEquals(e.printable_value('content'), e['content'])
+        e['content'] = u'hop\r\nhop\nhip\rmomo'
+        self.assertEquals(e.printable_value('content'), u'hop\nhop\nhip\nmomo')
 
 
     def test_fulltextindex(self):