author | Sylvain Thénault <sylvain.thenault@logilab.fr> |
Tue, 18 Aug 2009 09:25:26 +0200 | |
changeset 2901 | 8f4d495c94c2 |
parent 2808 | 497424219fb0 |
child 3377 | dd9d292b6a6d |
permissions | -rw-r--r-- |
0 | 1 |
"""Objects interacting together to provides the external page embeding |
2 |
functionality. |
|
3 |
||
4 |
||
5 |
:organization: Logilab |
|
1977
606923dff11b
big bunch of copyright / docstring update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1802
diff
changeset
|
6 |
:copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), license is LGPL v2. |
0 | 7 |
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
1977
606923dff11b
big bunch of copyright / docstring update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1802
diff
changeset
|
8 |
:license: GNU Lesser General Public License, v2.1 - http://www.gnu.org/licenses |
0 | 9 |
""" |
10 |
__docformat__ = "restructuredtext en" |
|
11 |
||
12 |
import re |
|
13 |
from urlparse import urljoin |
|
14 |
from urllib2 import urlopen, Request, HTTPError |
|
2808
497424219fb0
fix urlquote imports
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2798
diff
changeset
|
15 |
from urllib import quote as urlquote # XXX should use view.url_quote method |
0 | 16 |
|
17 |
from logilab.mtconverter import guess_encoding |
|
18 |
||
692
800592b8d39b
replace deprecated cubicweb.common.selectors by its new module path (cubicweb.selectors)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
631
diff
changeset
|
19 |
from cubicweb.selectors import (one_line_rset, score_entity, |
800592b8d39b
replace deprecated cubicweb.common.selectors by its new module path (cubicweb.selectors)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
631
diff
changeset
|
20 |
match_search_state, implements) |
0 | 21 |
from cubicweb.interfaces import IEmbedable |
800 | 22 |
from cubicweb.view import NOINDEX, NOFOLLOW |
0 | 23 |
from cubicweb.common.uilib import soup2xhtml |
24 |
from cubicweb.web.controller import Controller |
|
25 |
from cubicweb.web.action import Action |
|
26 |
from cubicweb.web.views import basetemplates |
|
27 |
||
28 |
||
29 |
class ExternalTemplate(basetemplates.TheMainTemplate): |
|
30 |
"""template embeding an external web pages into CubicWeb web interface |
|
31 |
""" |
|
32 |
id = 'external' |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
33 |
|
0 | 34 |
def call(self, body): |
35 |
# XXX fallback to HTML 4 mode when embeding ? |
|
36 |
self.set_request_content_type() |
|
37 |
self.req.search_state = ('normal',) |
|
38 |
self.template_header(self.content_type, None, self.req._('external page'), |
|
39 |
[NOINDEX, NOFOLLOW]) |
|
40 |
self.content_header() |
|
41 |
self.w(body) |
|
42 |
self.content_footer() |
|
43 |
self.template_footer() |
|
44 |
||
45 |
||
46 |
class EmbedController(Controller): |
|
47 |
id = 'embed' |
|
48 |
template = 'external' |
|
49 |
||
50 |
def publish(self, rset=None): |
|
51 |
req = self.req |
|
52 |
if 'custom_css' in req.form: |
|
53 |
req.add_css(req.form['custom_css']) |
|
54 |
embedded_url = req.form['url'] |
|
55 |
allowed = self.config['embed-allowed'] |
|
56 |
_ = req._ |
|
57 |
if allowed is None or not allowed.match(embedded_url): |
|
58 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
59 |
_('error while embedding page'), |
|
60 |
_('embedding this url is forbidden')) |
|
61 |
else: |
|
62 |
prefix = req.build_url(self.id, url='') |
|
63 |
authorization = req.get_header('Authorization') |
|
64 |
if authorization: |
|
65 |
headers = {'Authorization' : authorization} |
|
66 |
else: |
|
67 |
headers = {} |
|
68 |
try: |
|
69 |
body = embed_external_page(embedded_url, prefix, |
|
70 |
headers, req.form.get('custom_css')) |
|
71 |
body = soup2xhtml(body, self.req.encoding) |
|
72 |
except HTTPError, err: |
|
73 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
74 |
_('error while embedding page'), err) |
|
1092
b8fbb95dc0eb
process_rql now done in the controller
sylvain.thenault@logilab.fr
parents:
800
diff
changeset
|
75 |
self.process_rql(req.form.get('rql')) |
2650
18aec79ec3a3
R [vreg] important refactoring of the vregistry, moving behaviour to end dictionnary (and so leaving room for more flexibility ; keep bw compat ; update api usage in cw
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
76 |
return self.vreg['views'].main_template(req, self.template, |
18aec79ec3a3
R [vreg] important refactoring of the vregistry, moving behaviour to end dictionnary (and so leaving room for more flexibility ; keep bw compat ; update api usage in cw
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
1977
diff
changeset
|
77 |
rset=self.rset, body=body) |
0 | 78 |
|
79 |
||
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
80 |
def entity_has_embedable_url(entity): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
81 |
"""return 1 if the entity provides an allowed embedable url""" |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
82 |
url = entity.embeded_url() |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
83 |
if not url or not url.strip(): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
84 |
return 0 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
85 |
allowed = entity.config['embed-allowed'] |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
86 |
if allowed is None or not allowed.match(url): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
87 |
return 0 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
88 |
return 1 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
89 |
|
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
90 |
|
0 | 91 |
class EmbedAction(Action): |
92 |
"""display an 'embed' link on entity implementing `embeded_url` method |
|
93 |
if the returned url match embeding configuration |
|
94 |
""" |
|
95 |
id = 'embed' |
|
742
99115e029dca
replaced most of __selectors__ assignments with __select__
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
692
diff
changeset
|
96 |
__select__ = (one_line_rset() & match_search_state('normal') |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
97 |
& implements(IEmbedable) |
742
99115e029dca
replaced most of __selectors__ assignments with __select__
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
692
diff
changeset
|
98 |
& score_entity(entity_has_embedable_url)) |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
99 |
|
0 | 100 |
title = _('embed') |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
101 |
|
0 | 102 |
def url(self, row=0): |
103 |
entity = self.rset.get_entity(row, 0) |
|
104 |
url = urljoin(self.req.base_url(), entity.embeded_url()) |
|
105 |
if self.req.form.has_key('rql'): |
|
2798
9c650701cb17
stop using controller class attribute, allowing to deprecate appobject.build_url in favor of req.build_url
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2650
diff
changeset
|
106 |
return self.build_url('embed', url=url, rql=self.req.form['rql']) |
9c650701cb17
stop using controller class attribute, allowing to deprecate appobject.build_url in favor of req.build_url
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
2650
diff
changeset
|
107 |
return self.build_url('embed', url=url) |
0 | 108 |
|
109 |
||
110 |
||
111 |
# functions doing necessary substitutions to embed an external html page ###### |
|
112 |
||
113 |
||
114 |
BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U) |
|
115 |
HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U) |
|
116 |
SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U) |
|
117 |
||
118 |
||
119 |
class replace_href: |
|
120 |
def __init__(self, prefix, custom_css=None): |
|
121 |
self.prefix = prefix |
|
122 |
self.custom_css = custom_css |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
123 |
|
0 | 124 |
def __call__(self, match): |
125 |
original_url = match.group(1) |
|
126 |
url = self.prefix + urlquote(original_url, safe='') |
|
127 |
if self.custom_css is not None: |
|
128 |
if '?' in url: |
|
129 |
url = '%s&custom_css=%s' % (url, self.custom_css) |
|
130 |
else: |
|
131 |
url = '%s?custom_css=%s' % (url, self.custom_css) |
|
132 |
return '<a href="%s"' % url |
|
133 |
||
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
134 |
|
0 | 135 |
class absolutize_links: |
136 |
def __init__(self, embedded_url, tag, custom_css=None): |
|
137 |
self.embedded_url = embedded_url |
|
138 |
self.tag = tag |
|
139 |
self.custom_css = custom_css |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
140 |
|
0 | 141 |
def __call__(self, match): |
142 |
original_url = match.group(1) |
|
143 |
if '://' in original_url: |
|
144 |
return match.group(0) # leave it unchanged |
|
145 |
return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url)) |
|
146 |
||
147 |
||
148 |
def prefix_links(body, prefix, embedded_url, custom_css=None): |
|
149 |
filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)), |
|
150 |
(SRC_RGX, absolutize_links(embedded_url, '<img src')), |
|
151 |
(HREF_RGX, replace_href(prefix, custom_css))) |
|
152 |
for rgx, repl in filters: |
|
153 |
body = rgx.sub(repl, body) |
|
154 |
return body |
|
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
155 |
|
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
156 |
|
0 | 157 |
def embed_external_page(url, prefix, headers=None, custom_css=None): |
158 |
req = Request(url, headers=(headers or {})) |
|
159 |
content = urlopen(req).read() |
|
160 |
page_source = unicode(content, guess_encoding(content), 'replace') |
|
1132 | 161 |
page_source = page_source |
0 | 162 |
match = BODY_RGX.search(page_source) |
163 |
if match is None: |
|
164 |
return page_source |
|
165 |
return prefix_links(match.group(1), prefix, url, custom_css) |