safe_cut fonction return a resum from html text.
safe_cut uses fonction text_cut_letters which cut text given a number of letters.
--- a/common/uilib.py Mon Dec 01 14:46:30 2008 +0100
+++ b/common/uilib.py Mon Dec 01 14:51:12 2008 +0100
@@ -168,18 +168,43 @@
params.append('true')
return "javascript: replacePageChunk(%s);" % ', '.join(params)
+from lxml import etree
+from copy import deepcopy
+
def safe_cut(text, length):
"""returns a string of length <length> based on <text>, removing any html
tags from given text if cut is necessary.
"""
if text is None:
return u''
- text_nohtml = remove_html_tags(text)
- # try to keep html tags if text is short enough
- if len(text_nohtml) <= length:
- return text
- # else if un-tagged text is too long, cut it
- return text_nohtml[:length-3] + u'...'
+ textParse = etree.HTML(text)
+ compteur = 0
+
+ for element in textParse.iter():
+ if compteur > length:
+ parent = element.getparent()
+ parent.remove(element)
+ else:
+ if element.text is not None:
+ text_resum = text_cut_letters(element.text,length)
+ len_text_resum = len(''.join(text_resum.split()))
+ compteur = compteur + len_text_resum
+ element.text = text_resum
+
+ if element.tail is not None:
+ if compteur < length:
+ text_resum = text_cut_letters(element.tail,length)
+ len_text_resum = len(''.join(text_resum.split()))
+ compteur = compteur + len_text_resum
+ element.tail = text_resum
+ else:
+ element.tail = ''
+
+ div = etree.HTML('<div></div>')[0][0]
+ listNode = textParse[0].getchildren()
+ for node in listNode:
+ div.append(deepcopy(node))
+ return etree.tounicode(div)
def text_cut(text, nbwords=30):
if text is None:
@@ -190,6 +215,19 @@
textlength = minlength
return text[:textlength]
+def text_cut_letters(text, nbletters):
+ if text is None:
+ return u''
+ if len(''.join(text.split())) <= nbletters:
+ return text
+ else:
+ text_nospace = ''.join(text.split())
+ textlength=text.find('.') + 1
+
+ if textlength==0:
+ textlength=text.find(' ', nbletters+5)
+
+ return text[:textlength]
def cut(text, length):
"""returns a string of length <length> based on <text>