178 element.tail = '' |
178 element.tail = '' |
179 text = etree.tounicode(dom[0])[6:-7] # remove wrapping <body></body> |
179 text = etree.tounicode(dom[0])[6:-7] # remove wrapping <body></body> |
180 if add_ellipsis: |
180 if add_ellipsis: |
181 return text + u'...' |
181 return text + u'...' |
182 return text |
182 return text |
183 |
183 |
184 def text_cut(text, nbwords=30): |
184 def text_cut(text, nbwords=30, gotoperiod=True): |
185 """from the given plain text, return a text with at least <nbwords> words, |
185 """from the given plain text, return a text with at least <nbwords> words, |
186 trying to go to the end of the current sentence. |
186 trying to go to the end of the current sentence. |
|
187 |
|
188 :param nbwords: the minimum number of words required |
|
189 :param gotoperiod: specifies if the function should try to go to |
|
190 the first period after the cut (i.e. finish |
|
191 the sentence if possible) |
187 |
192 |
188 Note that spaces are normalized. |
193 Note that spaces are normalized. |
189 """ |
194 """ |
190 if text is None: |
195 if text is None: |
191 return u'' |
196 return u'' |
192 words = text.split() |
197 words = text.split() |
193 text = u' '.join(words) # normalize spaces |
198 text = u' '.join(words) # normalize spaces |
194 minlength = len(' '.join(words[:nbwords])) |
199 textlength = minlength = len(' '.join(words[:nbwords])) |
195 textlength = text.find('.', minlength) + 1 |
200 if gotoperiod: |
196 if textlength == 0: # no point found |
201 textlength = text.find('.', minlength) + 1 |
197 textlength = minlength |
202 if textlength == 0: # no period found |
|
203 textlength = minlength |
198 return text[:textlength] |
204 return text[:textlength] |
199 |
205 |
200 def cut(text, length): |
206 def cut(text, length): |
201 """returns a string of a maximum length <length> based on <text> |
207 """returns a string of a maximum length <length> based on <text> |
202 (approximatively, since if text has been cut, '...' is added to the end of the string, |
208 (approximatively, since if text has been cut, '...' is added to the end of the string, |