use mtconverter's html_unescape rather than saxutils' escape to deal with any html entity
--- a/common/test/unittest_uilib.py Wed Jan 07 11:36:16 2009 -0800
+++ b/common/test/unittest_uilib.py Thu Jan 08 09:48:44 2009 +0100
@@ -27,6 +27,7 @@
self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">cd</a>', 5), u'ab <a href="hello">cd</a>')
self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">&d</a>', 4), u'ab &...')
self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">&d</a> ef', 5), u'ab &d...')
+ self.assertEquals(uilib.fallback_safe_cut(u'ab <a href="hello">ìd</a>', 4), u'ab ì...')
self.assertEquals(uilib.fallback_safe_cut(u'& <a href="hello">&d</a> ef', 4), u'& &d...')
def test_lxml_safe_cut(self):
--- a/common/uilib.py Wed Jan 07 11:36:16 2009 -0800
+++ b/common/uilib.py Thu Jan 08 09:48:44 2009 +0100
@@ -15,7 +15,6 @@
import re
from urllib import quote as urlquote
from cStringIO import StringIO
-from xml.sax.saxutils import unescape
from copy import deepcopy
import simplejson
@@ -23,7 +22,7 @@
from mx.DateTime import DateTimeType, DateTimeDeltaType
from logilab.common.textutils import unormalize
-from logilab.mtconverter import html_escape
+from logilab.mtconverter import html_escape, html_unescape
def ustrftime(date, fmt='%Y-%m-%d'):
"""like strftime, but returns a unicode string instead of an encoded
@@ -117,7 +116,7 @@
tags from given text if cut is necessary."""
if text is None:
return u''
- noenttext = unescape(text)
+ noenttext = html_unescape(text)
text_nohtml = remove_html_tags(noenttext)
# try to keep html tags if text is short enough
if len(text_nohtml) <= length: