16 # |
16 # |
17 # You should have received a copy of the GNU Lesser General Public License along |
17 # You should have received a copy of the GNU Lesser General Public License along |
18 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
18 # with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
19 """user interface libraries |
19 """user interface libraries |
20 |
20 |
21 contains some functions designed to help implementation of cubicweb user interface |
21 contains some functions designed to help implementation of cubicweb user |
22 |
22 interface. |
23 """ |
23 """ |
|
24 |
24 __docformat__ = "restructuredtext en" |
25 __docformat__ = "restructuredtext en" |
25 |
26 |
26 import csv |
27 import csv |
27 import re |
28 import re |
28 from StringIO import StringIO |
29 from StringIO import StringIO |
121 # else if un-tagged text is too long, cut it |
122 # else if un-tagged text is too long, cut it |
122 return xml_escape(text_nohtml[:length] + u'...') |
123 return xml_escape(text_nohtml[:length] + u'...') |
123 |
124 |
124 fallback_safe_cut = safe_cut |
125 fallback_safe_cut = safe_cut |
125 |
126 |
126 |
127 REM_ROOT_HTML_TAGS = re.compile('</(body|html)>', re.U) |
127 try: |
128 try: |
128 from lxml import etree |
129 from lxml import etree |
129 except (ImportError, AttributeError): |
130 except (ImportError, AttributeError): |
130 # gae environment: lxml not available |
131 # gae environment: lxml not available |
131 pass |
132 pass |
132 else: |
133 else: |
133 |
134 |
134 def soup2xhtml(data, encoding): |
135 def soup2xhtml(data, encoding): |
135 """tidy (at least try) html soup and return the result |
136 """tidy (at least try) html soup and return the result |
|
137 |
136 Note: the function considers a string with no surrounding tag as valid |
138 Note: the function considers a string with no surrounding tag as valid |
137 if <div>`data`</div> can be parsed by an XML parser |
139 if <div>`data`</div> can be parsed by an XML parser |
138 """ |
140 """ |
139 # normalize line break |
141 # remove spurious </body> and </html> tags, then normalize line break |
140 # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1 |
142 # (see http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1) |
141 data = u'\n'.join(data.splitlines()) |
143 data = REM_ROOT_HTML_TAGS.sub('', u'\n'.join(data.splitlines())) |
142 # XXX lxml 1.1 support still needed ? |
144 # XXX lxml 1.1 support still needed ? |
143 xmltree = etree.HTML('<div>%s</div>' % data) |
145 xmltree = etree.HTML('<div>%s</div>' % data) |
144 # NOTE: lxml 1.1 (etch platforms) doesn't recognize |
146 # NOTE: lxml 1.1 (etch platforms) doesn't recognize |
145 # the encoding=unicode parameter (lxml 2.0 does), this is |
147 # the encoding=unicode parameter (lxml 2.0 does), this is |
146 # why we specify an encoding and re-decode to unicode later |
148 # why we specify an encoding and re-decode to unicode later |
147 body = etree.tostring(xmltree[0], encoding=encoding) |
149 body = etree.tostring(xmltree[0], encoding=encoding) |
148 # remove <body> and </body> and decode to unicode |
150 # remove <body> and </body> and decode to unicode |
149 return body[11:-13].decode(encoding) |
151 snippet = body[6:-7].decode(encoding) |
|
152 # take care to bad xhtml (for instance starting with </div>) which |
|
153 # may mess with the <div> we added below. Only remove it if it's |
|
154 # still there... |
|
155 if snippet.startswith('<div>') and snippet.endswith('</div>'): |
|
156 snippet = snippet[5:-6] |
|
157 return snippet |
150 |
158 |
151 if hasattr(etree.HTML('<div>test</div>'), 'iter'): |
159 if hasattr(etree.HTML('<div>test</div>'), 'iter'): |
152 |
160 |
153 def safe_cut(text, length): |
161 def safe_cut(text, length): |
154 """returns an html document of length <length> based on <text>, |
162 """returns an html document of length <length> based on <text>, |