163 element.tail = '' |
163 element.tail = '' |
164 text = etree.tounicode(dom[0])[6:-7] # remove wrapping <body></body> |
164 text = etree.tounicode(dom[0])[6:-7] # remove wrapping <body></body> |
165 if add_ellipsis: |
165 if add_ellipsis: |
166 return text + u'...' |
166 return text + u'...' |
167 return text |
167 return text |
168 |
168 |
169 def text_cut(text, nbwords=30): |
169 def text_cut(text, nbwords=30, gotoperiod=True): |
170 """from the given plain text, return a text with at least <nbwords> words, |
170 """from the given plain text, return a text with at least <nbwords> words, |
171 trying to go to the end of the current sentence. |
171 trying to go to the end of the current sentence. |
|
172 |
|
173 :param nbwords: the minimum number of words required |
|
174 :param gotoperiod: specifies if the function should try to go to |
|
175 the first period after the cut (i.e. finish |
|
176 the sentence if possible) |
172 |
177 |
173 Note that spaces are normalized. |
178 Note that spaces are normalized. |
174 """ |
179 """ |
175 if text is None: |
180 if text is None: |
176 return u'' |
181 return u'' |
177 words = text.split() |
182 words = text.split() |
178 text = u' '.join(words) # normalize spaces |
183 text = u' '.join(words) # normalize spaces |
179 minlength = len(' '.join(words[:nbwords])) |
184 textlength = minlength = len(' '.join(words[:nbwords])) |
180 textlength = text.find('.', minlength) + 1 |
185 if gotoperiod: |
181 if textlength == 0: # no point found |
186 textlength = text.find('.', minlength) + 1 |
182 textlength = minlength |
187 if textlength == 0: # no period found |
|
188 textlength = minlength |
183 return text[:textlength] |
189 return text[:textlength] |
184 |
190 |
185 def cut(text, length): |
191 def cut(text, length): |
186 """returns a string of a maximum length <length> based on <text> |
192 """returns a string of a maximum length <length> based on <text> |
187 (approximatively, since if text has been cut, '...' is added to the end of the string, |
193 (approximatively, since if text has been cut, '...' is added to the end of the string, |