diff -r ebe40a8c7cc9 -r f34ef2c64605 common/uilib.py --- a/common/uilib.py Wed Jan 07 16:12:45 2009 +0100 +++ b/common/uilib.py Wed Jan 07 18:30:00 2009 +0100 @@ -15,7 +15,7 @@ import re from urllib import quote as urlquote from cStringIO import StringIO -from xml.parsers.expat import ExpatError +from xml.sax.saxutils import unescape from copy import deepcopy import simplejson @@ -23,6 +23,7 @@ from mx.DateTime import DateTimeType, DateTimeDeltaType from logilab.common.textutils import unormalize +from logilab.mtconverter import html_escape def ustrftime(date, fmt='%Y-%m-%d'): """like strftime, but returns a unicode string instead of an encoded @@ -116,12 +117,15 @@ tags from given text if cut is necessary.""" if text is None: return u'' - text_nohtml = remove_html_tags(text) + noenttext = unescape(text) + text_nohtml = remove_html_tags(noenttext) # try to keep html tags if text is short enough if len(text_nohtml) <= length: return text # else if un-tagged text is too long, cut it - return text_nohtml[:length-3] + u'...' + return html_escape(text_nohtml[:length] + u'...') + +fallback_safe_cut = safe_cut try: @@ -152,40 +156,64 @@ """ if text is None: return u'' - textParse = etree.HTML(text) - compteur = 0 - - for element in textParse.iter(): - if compteur > length: + dom = etree.HTML(text) + curlength = 0 + add_ellipsis = False + for element in dom.iter(): + if curlength >= length: parent = element.getparent() parent.remove(element) + if curlength == length and (element.text or element.tail): + add_ellipsis = True else: if element.text is not None: - text_resum = text_cut_letters(element.text,length) - len_text_resum = len(''.join(text_resum.split())) - compteur = compteur + len_text_resum - element.text = text_resum - + element.text = cut(element.text, length - curlength) + curlength += len(element.text) if element.tail is not None: - if compteur < length: - text_resum = text_cut_letters(element.tail,length) - len_text_resum = len(''.join(text_resum.split())) - compteur = compteur + len_text_resum - element.tail = text_resum + if curlength < length: + element.tail = cut(element.tail, length - curlength) + curlength += len(element.tail) + elif curlength == length: + element.tail = '...' else: element.tail = '' + text = etree.tounicode(dom[0])[6:-7] # remove wrapping
+ if add_ellipsis: + return text + u'...' + return text + +def text_cut(text, nbwords=30): + """from the given plain text, return a text with at least