# HG changeset patch # User Laure Bourgois # Date 1228214404 -3600 # Node ID c5ff97312b8a11ba7a677d669729532bd31ea32d # Parent 031b97433e84fce1ba18e956f1dce88aea49adfe cleaning code diff -r 031b97433e84 -r c5ff97312b8a common/uilib.py --- a/common/uilib.py Tue Dec 02 10:23:11 2008 +0100 +++ b/common/uilib.py Tue Dec 02 11:40:04 2008 +0100 @@ -16,7 +16,6 @@ from urllib import quote as urlquote from cStringIO import StringIO from xml.parsers.expat import ExpatError -from lxml import etree from copy import deepcopy import simplejson @@ -114,7 +113,19 @@ def soup2xhtml(data, encoding): return data - + + def safe_cut(text, length): + """returns a string of length based on , removing any html + tags from given text if cut is necessary.""" + if text is None: + return u'' + text_nohtml = remove_html_tags(text) + # try to keep html tags if text is short enough + if len(text_nohtml) <= length: + return text + # else if un-tagged text is too long, cut it + return text_nohtml[:length-3] + u'...' + else: def soup2xhtml(data, encoding): @@ -130,6 +141,41 @@ # remove and and decode to unicode return body[11:-13].decode(encoding) + def safe_cut(text, length): + """returns an html document of length based on , + and cut is necessary. + """ + if text is None: + return u'' + textParse = etree.HTML(text) + compteur = 0 + + for element in textParse.iter(): + if compteur > length: + parent = element.getparent() + parent.remove(element) + else: + if element.text is not None: + text_resum = text_cut_letters(element.text,length) + len_text_resum = len(''.join(text_resum.split())) + compteur = compteur + len_text_resum + element.text = text_resum + + if element.tail is not None: + if compteur < length: + text_resum = text_cut_letters(element.tail,length) + len_text_resum = len(''.join(text_resum.split())) + compteur = compteur + len_text_resum + element.tail = text_resum + else: + element.tail = '' + + div = etree.HTML('
')[0][0] + listNode = textParse[0].getchildren() + for node in listNode: + div.append(deepcopy(node)) + return etree.tounicode(div) + # HTML generation helper functions ############################################ @@ -170,41 +216,6 @@ params.append('true') return "javascript: replacePageChunk(%s);" % ', '.join(params) -def safe_cut(text, length): - """returns a string of length based on , removing any html - tags from given text if cut is necessary. - """ - if text is None: - return u'' - textParse = etree.HTML(text) - compteur = 0 - - for element in textParse.iter(): - if compteur > length: - parent = element.getparent() - parent.remove(element) - else: - if element.text is not None: - text_resum = text_cut_letters(element.text,length) - len_text_resum = len(''.join(text_resum.split())) - compteur = compteur + len_text_resum - element.text = text_resum - - if element.tail is not None: - if compteur < length: - text_resum = text_cut_letters(element.tail,length) - len_text_resum = len(''.join(text_resum.split())) - compteur = compteur + len_text_resum - element.tail = text_resum - else: - element.tail = '' - - div = etree.HTML('
')[0][0] - listNode = textParse[0].getchildren() - for node in listNode: - div.append(deepcopy(node)) - return etree.tounicode(div) - def text_cut(text, nbwords=30): if text is None: return u''