14 import locale |
14 import locale |
15 import re |
15 import re |
16 from urllib import quote as urlquote |
16 from urllib import quote as urlquote |
17 from cStringIO import StringIO |
17 from cStringIO import StringIO |
18 from xml.parsers.expat import ExpatError |
18 from xml.parsers.expat import ExpatError |
19 from lxml import etree |
|
20 from copy import deepcopy |
19 from copy import deepcopy |
21 |
20 |
22 import simplejson |
21 import simplejson |
23 |
22 |
24 from mx.DateTime import DateTimeType, DateTimeDeltaType |
23 from mx.DateTime import DateTimeType, DateTimeDeltaType |
112 except ImportError: |
111 except ImportError: |
113 # gae environment: lxml not availabel |
112 # gae environment: lxml not availabel |
114 |
113 |
115 def soup2xhtml(data, encoding): |
114 def soup2xhtml(data, encoding): |
116 return data |
115 return data |
117 |
116 |
|
117 def safe_cut(text, length): |
|
118 """returns a string of length <length> based on <text>, removing any html |
|
119 tags from given text if cut is necessary.""" |
|
120 if text is None: |
|
121 return u'' |
|
122 text_nohtml = remove_html_tags(text) |
|
123 # try to keep html tags if text is short enough |
|
124 if len(text_nohtml) <= length: |
|
125 return text |
|
126 # else if un-tagged text is too long, cut it |
|
127 return text_nohtml[:length-3] + u'...' |
|
128 |
118 else: |
129 else: |
119 |
130 |
120 def soup2xhtml(data, encoding): |
131 def soup2xhtml(data, encoding): |
121 """tidy (at least try) html soup and return the result |
132 """tidy (at least try) html soup and return the result |
122 Note: the function considers a string with no surrounding tag as valid |
133 Note: the function considers a string with no surrounding tag as valid |
128 # why we specify an encoding and re-decode to unicode later |
139 # why we specify an encoding and re-decode to unicode later |
129 body = etree.tostring(xmltree[0], encoding=encoding) |
140 body = etree.tostring(xmltree[0], encoding=encoding) |
130 # remove <body> and </body> and decode to unicode |
141 # remove <body> and </body> and decode to unicode |
131 return body[11:-13].decode(encoding) |
142 return body[11:-13].decode(encoding) |
132 |
143 |
|
144 def safe_cut(text, length): |
|
145 """returns an html document of length <length> based on <text>, |
|
146 and cut is necessary. |
|
147 """ |
|
148 if text is None: |
|
149 return u'' |
|
150 textParse = etree.HTML(text) |
|
151 compteur = 0 |
|
152 |
|
153 for element in textParse.iter(): |
|
154 if compteur > length: |
|
155 parent = element.getparent() |
|
156 parent.remove(element) |
|
157 else: |
|
158 if element.text is not None: |
|
159 text_resum = text_cut_letters(element.text,length) |
|
160 len_text_resum = len(''.join(text_resum.split())) |
|
161 compteur = compteur + len_text_resum |
|
162 element.text = text_resum |
|
163 |
|
164 if element.tail is not None: |
|
165 if compteur < length: |
|
166 text_resum = text_cut_letters(element.tail,length) |
|
167 len_text_resum = len(''.join(text_resum.split())) |
|
168 compteur = compteur + len_text_resum |
|
169 element.tail = text_resum |
|
170 else: |
|
171 element.tail = '' |
|
172 |
|
173 div = etree.HTML('<div></div>')[0][0] |
|
174 listNode = textParse[0].getchildren() |
|
175 for node in listNode: |
|
176 div.append(deepcopy(node)) |
|
177 return etree.tounicode(div) |
|
178 |
133 |
179 |
134 # HTML generation helper functions ############################################ |
180 # HTML generation helper functions ############################################ |
135 |
181 |
136 from logilab.mtconverter import html_escape |
182 from logilab.mtconverter import html_escape |
137 |
183 |
167 if extraparams: |
213 if extraparams: |
168 params.append(simplejson.dumps(extraparams)) |
214 params.append(simplejson.dumps(extraparams)) |
169 if swap: |
215 if swap: |
170 params.append('true') |
216 params.append('true') |
171 return "javascript: replacePageChunk(%s);" % ', '.join(params) |
217 return "javascript: replacePageChunk(%s);" % ', '.join(params) |
172 |
|
173 def safe_cut(text, length): |
|
174 """returns a string of length <length> based on <text>, removing any html |
|
175 tags from given text if cut is necessary. |
|
176 """ |
|
177 if text is None: |
|
178 return u'' |
|
179 textParse = etree.HTML(text) |
|
180 compteur = 0 |
|
181 |
|
182 for element in textParse.iter(): |
|
183 if compteur > length: |
|
184 parent = element.getparent() |
|
185 parent.remove(element) |
|
186 else: |
|
187 if element.text is not None: |
|
188 text_resum = text_cut_letters(element.text,length) |
|
189 len_text_resum = len(''.join(text_resum.split())) |
|
190 compteur = compteur + len_text_resum |
|
191 element.text = text_resum |
|
192 |
|
193 if element.tail is not None: |
|
194 if compteur < length: |
|
195 text_resum = text_cut_letters(element.tail,length) |
|
196 len_text_resum = len(''.join(text_resum.split())) |
|
197 compteur = compteur + len_text_resum |
|
198 element.tail = text_resum |
|
199 else: |
|
200 element.tail = '' |
|
201 |
|
202 div = etree.HTML('<div></div>')[0][0] |
|
203 listNode = textParse[0].getchildren() |
|
204 for node in listNode: |
|
205 div.append(deepcopy(node)) |
|
206 return etree.tounicode(div) |
|
207 |
218 |
208 def text_cut(text, nbwords=30): |
219 def text_cut(text, nbwords=30): |
209 if text is None: |
220 if text is None: |
210 return u'' |
221 return u'' |
211 minlength = len(' '.join(text.split()[:nbwords])) |
222 minlength = len(' '.join(text.split()[:nbwords])) |