use mtconverter's html_unescape rather than saxutils' escape to deal with any html entity
authorAdrien Di Mascio <Adrien.DiMascio@logilab.fr>
Thu, 08 Jan 2009 09:48:44 +0100
changeset 362 a6a319f000c3
parent 359 164307023401
child 363 9da036d0cced
use mtconverter's html_unescape rather than saxutils' escape to deal with any html entity
common/test/unittest_uilib.py
common/uilib.py
--- a/common/test/unittest_uilib.py	Wed Jan 07 11:36:16 2009 -0800
+++ b/common/test/unittest_uilib.py	Thu Jan 08 09:48:44 2009 +0100
@@ -27,6 +27,7 @@
         self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">cd</a>', 5), u'ab <a href="hello">cd</a>')
         self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">&amp;d</a>', 4), u'ab &amp;...')
         self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">&amp;d</a> ef', 5), u'ab &amp;d...')
+        self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">&igrave;d</a>', 4), u'ab ì...')
         self.assertEquals(uilib.fallback_safe_cut(u'&amp; <a href="hello">&amp;d</a> ef', 4), u'&amp; &amp;d...')
         
     def test_lxml_safe_cut(self):
--- a/common/uilib.py	Wed Jan 07 11:36:16 2009 -0800
+++ b/common/uilib.py	Thu Jan 08 09:48:44 2009 +0100
@@ -15,7 +15,6 @@
 import re
 from urllib import quote as urlquote
 from cStringIO import StringIO
-from xml.sax.saxutils import unescape
 from copy import deepcopy
 
 import simplejson
@@ -23,7 +22,7 @@
 from mx.DateTime import DateTimeType, DateTimeDeltaType
 
 from logilab.common.textutils import unormalize
-from logilab.mtconverter import html_escape
+from logilab.mtconverter import html_escape, html_unescape
 
 def ustrftime(date, fmt='%Y-%m-%d'):
     """like strftime, but returns a unicode string instead of an encoded
@@ -117,7 +116,7 @@
     tags from given text if cut is necessary."""
     if text is None:
         return u''
-    noenttext = unescape(text)
+    noenttext = html_unescape(text)
     text_nohtml = remove_html_tags(noenttext)
     # try to keep html tags if text is short enough
     if len(text_nohtml) <= length: