author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Wed, 12 May 2010 17:07:07 +0200 | |
branch | stable |
changeset 5524 | 2f3d4ca6f7f8 |
parent 5424 | 8ecbcbff9777 |
child 5556 | 9ab2b4c74baf |
child 5886 | 00a78298d30d |
permissions | -rw-r--r-- |
5421
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
1 |
# copyright 2003-2010 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
2 |
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
3 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
4 |
# This file is part of CubicWeb. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
5 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
6 |
# CubicWeb is free software: you can redistribute it and/or modify it under the |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
7 |
# terms of the GNU Lesser General Public License as published by the Free |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
8 |
# Software Foundation, either version 2.1 of the License, or (at your option) |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
9 |
# any later version. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
10 |
# |
5424
8ecbcbff9777
replace logilab-common by CubicWeb in disclaimer
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
5421
diff
changeset
|
11 |
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT |
5421
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
12 |
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
13 |
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
14 |
# details. |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
15 |
# |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
16 |
# You should have received a copy of the GNU Lesser General Public License along |
8167de96c523
proper licensing information (LGPL-2.1). Hope I get it right this time.
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4252
diff
changeset
|
17 |
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>. |
0 | 18 |
"""Objects interacting together to provides the external page embeding |
19 |
functionality. |
|
20 |
||
21 |
||
22 |
""" |
|
23 |
__docformat__ = "restructuredtext en" |
|
24 |
||
25 |
import re |
|
26 |
from urlparse import urljoin |
|
27 |
from urllib2 import urlopen, Request, HTTPError |
|
2808
497424219fb0
fix urlquote imports
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2798
diff
changeset
|
28 |
from urllib import quote as urlquote # XXX should use view.url_quote method |
0 | 29 |
|
30 |
from logilab.mtconverter import guess_encoding |
|
31 |
||
692
800592b8d39b
replace deprecated cubicweb.common.selectors by its new module path (cubicweb.selectors)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
631
diff
changeset
|
32 |
from cubicweb.selectors import (one_line_rset, score_entity, |
800592b8d39b
replace deprecated cubicweb.common.selectors by its new module path (cubicweb.selectors)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
631
diff
changeset
|
33 |
match_search_state, implements) |
0 | 34 |
from cubicweb.interfaces import IEmbedable |
800 | 35 |
from cubicweb.view import NOINDEX, NOFOLLOW |
4023
eae23c40627a
drop common subpackage
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
3460
diff
changeset
|
36 |
from cubicweb.uilib import soup2xhtml |
0 | 37 |
from cubicweb.web.controller import Controller |
38 |
from cubicweb.web.action import Action |
|
39 |
from cubicweb.web.views import basetemplates |
|
40 |
||
41 |
||
42 |
class ExternalTemplate(basetemplates.TheMainTemplate): |
|
43 |
"""template embeding an external web pages into CubicWeb web interface |
|
44 |
""" |
|
3377
dd9d292b6a6d
use __regid__ instead of id on appobject classes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2808
diff
changeset
|
45 |
__regid__ = 'external' |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
46 |
|
0 | 47 |
def call(self, body): |
48 |
# XXX fallback to HTML 4 mode when embeding ? |
|
49 |
self.set_request_content_type() |
|
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
50 |
self._cw.search_state = ('normal',) |
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
51 |
self.template_header(self.content_type, None, self._cw._('external page'), |
0 | 52 |
[NOINDEX, NOFOLLOW]) |
53 |
self.content_header() |
|
54 |
self.w(body) |
|
55 |
self.content_footer() |
|
56 |
self.template_footer() |
|
57 |
||
58 |
||
59 |
class EmbedController(Controller): |
|
3377
dd9d292b6a6d
use __regid__ instead of id on appobject classes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2808
diff
changeset
|
60 |
__regid__ = 'embed' |
0 | 61 |
template = 'external' |
62 |
||
63 |
def publish(self, rset=None): |
|
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
64 |
req = self._cw |
0 | 65 |
if 'custom_css' in req.form: |
66 |
req.add_css(req.form['custom_css']) |
|
67 |
embedded_url = req.form['url'] |
|
4083
3b285889b8e9
3.6 api update
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4023
diff
changeset
|
68 |
allowed = self._cw.vreg.config['embed-allowed'] |
0 | 69 |
_ = req._ |
70 |
if allowed is None or not allowed.match(embedded_url): |
|
71 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
72 |
_('error while embedding page'), |
|
73 |
_('embedding this url is forbidden')) |
|
74 |
else: |
|
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
75 |
prefix = req.build_url(self.__regid__, url='') |
0 | 76 |
authorization = req.get_header('Authorization') |
77 |
if authorization: |
|
78 |
headers = {'Authorization' : authorization} |
|
79 |
else: |
|
80 |
headers = {} |
|
81 |
try: |
|
82 |
body = embed_external_page(embedded_url, prefix, |
|
83 |
headers, req.form.get('custom_css')) |
|
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
84 |
body = soup2xhtml(body, self._cw.encoding) |
0 | 85 |
except HTTPError, err: |
86 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
87 |
_('error while embedding page'), err) |
|
1092
b8fbb95dc0eb
process_rql now done in the controller
sylvain.thenault@logilab.fr
parents:
800
diff
changeset
|
88 |
self.process_rql(req.form.get('rql')) |
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
89 |
return self._cw.vreg['views'].main_template(req, self.template, |
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
90 |
rset=self.cw_rset, body=body) |
0 | 91 |
|
92 |
||
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
93 |
def entity_has_embedable_url(entity): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
94 |
"""return 1 if the entity provides an allowed embedable url""" |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
95 |
url = entity.embeded_url() |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
96 |
if not url or not url.strip(): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
97 |
return 0 |
4083
3b285889b8e9
3.6 api update
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
4023
diff
changeset
|
98 |
allowed = entity._cw.vreg.config['embed-allowed'] |
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
99 |
if allowed is None or not allowed.match(url): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
100 |
return 0 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
101 |
return 1 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
102 |
|
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
103 |
|
0 | 104 |
class EmbedAction(Action): |
105 |
"""display an 'embed' link on entity implementing `embeded_url` method |
|
106 |
if the returned url match embeding configuration |
|
107 |
""" |
|
3377
dd9d292b6a6d
use __regid__ instead of id on appobject classes
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2808
diff
changeset
|
108 |
__regid__ = 'embed' |
742
99115e029dca
replaced most of __selectors__ assignments with __select__
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
692
diff
changeset
|
109 |
__select__ = (one_line_rset() & match_search_state('normal') |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
110 |
& implements(IEmbedable) |
742
99115e029dca
replaced most of __selectors__ assignments with __select__
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
692
diff
changeset
|
111 |
& score_entity(entity_has_embedable_url)) |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
112 |
|
0 | 113 |
title = _('embed') |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
114 |
|
0 | 115 |
def url(self, row=0): |
3451
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
116 |
entity = self.cw_rset.get_entity(row, 0) |
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
117 |
url = urljoin(self._cw.base_url(), entity.embeded_url()) |
6b46d73823f5
[api] work in progress, use __regid__, cw_*, etc.
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3377
diff
changeset
|
118 |
if self._cw.form.has_key('rql'): |
3460
e4843535db25
[api] some more _cw / __regid__, automatic tests now pass again
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3451
diff
changeset
|
119 |
return self._cw.build_url('embed', url=url, rql=self._cw.form['rql']) |
e4843535db25
[api] some more _cw / __regid__, automatic tests now pass again
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
3451
diff
changeset
|
120 |
return self._cw.build_url('embed', url=url) |
0 | 121 |
|
122 |
||
123 |
||
124 |
# functions doing necessary substitutions to embed an external html page ###### |
|
125 |
||
126 |
||
127 |
BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U) |
|
128 |
HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U) |
|
129 |
SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U) |
|
130 |
||
131 |
||
132 |
class replace_href: |
|
133 |
def __init__(self, prefix, custom_css=None): |
|
134 |
self.prefix = prefix |
|
135 |
self.custom_css = custom_css |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
136 |
|
0 | 137 |
def __call__(self, match): |
138 |
original_url = match.group(1) |
|
139 |
url = self.prefix + urlquote(original_url, safe='') |
|
140 |
if self.custom_css is not None: |
|
141 |
if '?' in url: |
|
142 |
url = '%s&custom_css=%s' % (url, self.custom_css) |
|
143 |
else: |
|
144 |
url = '%s?custom_css=%s' % (url, self.custom_css) |
|
145 |
return '<a href="%s"' % url |
|
146 |
||
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
147 |
|
0 | 148 |
class absolutize_links: |
149 |
def __init__(self, embedded_url, tag, custom_css=None): |
|
150 |
self.embedded_url = embedded_url |
|
151 |
self.tag = tag |
|
152 |
self.custom_css = custom_css |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
153 |
|
0 | 154 |
def __call__(self, match): |
155 |
original_url = match.group(1) |
|
156 |
if '://' in original_url: |
|
157 |
return match.group(0) # leave it unchanged |
|
158 |
return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url)) |
|
159 |
||
160 |
||
161 |
def prefix_links(body, prefix, embedded_url, custom_css=None): |
|
162 |
filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)), |
|
163 |
(SRC_RGX, absolutize_links(embedded_url, '<img src')), |
|
164 |
(HREF_RGX, replace_href(prefix, custom_css))) |
|
165 |
for rgx, repl in filters: |
|
166 |
body = rgx.sub(repl, body) |
|
167 |
return body |
|
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
168 |
|
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
169 |
|
0 | 170 |
def embed_external_page(url, prefix, headers=None, custom_css=None): |
171 |
req = Request(url, headers=(headers or {})) |
|
172 |
content = urlopen(req).read() |
|
173 |
page_source = unicode(content, guess_encoding(content), 'replace') |
|
1132 | 174 |
page_source = page_source |
0 | 175 |
match = BODY_RGX.search(page_source) |
176 |
if match is None: |
|
177 |
return page_source |
|
178 |
return prefix_links(match.group(1), prefix, url, custom_css) |