safe_cut fonction return a resum from html text.
authorLaure Bourgois <Laure.Bourgois@logilab.fr>
Mon, 01 Dec 2008 14:51:12 +0100
changeset 159 ff7b0f8dcb3c
parent 158 18385a9bbba0
child 160 cf9ab83b1cf3
safe_cut fonction return a resum from html text. safe_cut uses fonction text_cut_letters which cut text given a number of letters.
common/uilib.py
--- a/common/uilib.py	Mon Dec 01 14:46:30 2008 +0100
+++ b/common/uilib.py	Mon Dec 01 14:51:12 2008 +0100
@@ -168,18 +168,43 @@
         params.append('true')
     return "javascript: replacePageChunk(%s);" % ', '.join(params)
 
+from lxml import etree
+from copy import deepcopy
+
 def safe_cut(text, length):
     """returns a string of length <length> based on <text>, removing any html
     tags from given text if cut is necessary.
     """
     if text is None:
         return u''
-    text_nohtml = remove_html_tags(text)
-    # try to keep html tags if text is short enough
-    if len(text_nohtml) <= length:
-        return text
-    # else if un-tagged text is too long, cut it
-    return text_nohtml[:length-3] + u'...'
+    textParse = etree.HTML(text)
+    compteur = 0
+
+    for element in textParse.iter():
+         if compteur > length:
+             parent = element.getparent()
+             parent.remove(element)
+         else:
+             if element.text is not None:
+                 text_resum = text_cut_letters(element.text,length)
+                 len_text_resum = len(''.join(text_resum.split()))
+                 compteur = compteur + len_text_resum
+                 element.text = text_resum
+                         
+             if element.tail is not None:
+                 if compteur < length:
+                     text_resum = text_cut_letters(element.tail,length)
+                     len_text_resum = len(''.join(text_resum.split()))
+                     compteur = compteur + len_text_resum
+                     element.tail = text_resum
+                 else:
+                     element.tail = ''
+                     
+    div = etree.HTML('<div></div>')[0][0]
+    listNode = textParse[0].getchildren()
+    for node in listNode:
+         div.append(deepcopy(node))
+    return etree.tounicode(div)
 
 def text_cut(text, nbwords=30):
     if text is None:
@@ -190,6 +215,19 @@
         textlength = minlength 
     return text[:textlength]
 
+def text_cut_letters(text, nbletters):
+    if text is None:
+        return u''
+    if len(''.join(text.split())) <= nbletters:
+           return text
+    else:
+        text_nospace = ''.join(text.split())
+        textlength=text.find('.') + 1
+
+        if textlength==0:
+           textlength=text.find(' ', nbletters+5)
+           
+        return text[:textlength] 
 
 def cut(text, length):
     """returns a string of length <length> based on <text>