author | Adrien Di Mascio <Adrien.DiMascio@logilab.fr> |
Thu, 28 May 2009 19:07:06 +0200 | |
branch | stable |
changeset 1992 | b073057c2756 |
parent 1802 | d628defebc17 |
child 1977 | 606923dff11b |
permissions | -rw-r--r-- |
0 | 1 |
"""Objects interacting together to provides the external page embeding |
2 |
functionality. |
|
3 |
||
4 |
||
5 |
:organization: Logilab |
|
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
6 |
:copyright: 2001-2009 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
0 | 7 |
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
8 |
""" |
|
9 |
__docformat__ = "restructuredtext en" |
|
10 |
||
11 |
import re |
|
12 |
from urlparse import urljoin |
|
13 |
from urllib2 import urlopen, Request, HTTPError |
|
14 |
||
15 |
from logilab.mtconverter import guess_encoding |
|
16 |
||
17 |
from cubicweb import urlquote # XXX should use view.url_quote method |
|
692
800592b8d39b
replace deprecated cubicweb.common.selectors by its new module path (cubicweb.selectors)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
631
diff
changeset
|
18 |
from cubicweb.selectors import (one_line_rset, score_entity, |
800592b8d39b
replace deprecated cubicweb.common.selectors by its new module path (cubicweb.selectors)
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
631
diff
changeset
|
19 |
match_search_state, implements) |
0 | 20 |
from cubicweb.interfaces import IEmbedable |
800 | 21 |
from cubicweb.view import NOINDEX, NOFOLLOW |
0 | 22 |
from cubicweb.common.uilib import soup2xhtml |
23 |
from cubicweb.web.controller import Controller |
|
24 |
from cubicweb.web.action import Action |
|
25 |
from cubicweb.web.views import basetemplates |
|
26 |
||
27 |
||
28 |
class ExternalTemplate(basetemplates.TheMainTemplate): |
|
29 |
"""template embeding an external web pages into CubicWeb web interface |
|
30 |
""" |
|
31 |
id = 'external' |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
32 |
|
0 | 33 |
def call(self, body): |
34 |
# XXX fallback to HTML 4 mode when embeding ? |
|
35 |
self.set_request_content_type() |
|
36 |
self.req.search_state = ('normal',) |
|
37 |
self.template_header(self.content_type, None, self.req._('external page'), |
|
38 |
[NOINDEX, NOFOLLOW]) |
|
39 |
self.content_header() |
|
40 |
self.w(body) |
|
41 |
self.content_footer() |
|
42 |
self.template_footer() |
|
43 |
||
44 |
||
45 |
class EmbedController(Controller): |
|
46 |
id = 'embed' |
|
47 |
template = 'external' |
|
48 |
||
49 |
def publish(self, rset=None): |
|
50 |
req = self.req |
|
51 |
if 'custom_css' in req.form: |
|
52 |
req.add_css(req.form['custom_css']) |
|
53 |
embedded_url = req.form['url'] |
|
54 |
allowed = self.config['embed-allowed'] |
|
55 |
_ = req._ |
|
56 |
if allowed is None or not allowed.match(embedded_url): |
|
57 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
58 |
_('error while embedding page'), |
|
59 |
_('embedding this url is forbidden')) |
|
60 |
else: |
|
61 |
prefix = req.build_url(self.id, url='') |
|
62 |
authorization = req.get_header('Authorization') |
|
63 |
if authorization: |
|
64 |
headers = {'Authorization' : authorization} |
|
65 |
else: |
|
66 |
headers = {} |
|
67 |
try: |
|
68 |
body = embed_external_page(embedded_url, prefix, |
|
69 |
headers, req.form.get('custom_css')) |
|
70 |
body = soup2xhtml(body, self.req.encoding) |
|
71 |
except HTTPError, err: |
|
72 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
73 |
_('error while embedding page'), err) |
|
1092
b8fbb95dc0eb
process_rql now done in the controller
sylvain.thenault@logilab.fr
parents:
800
diff
changeset
|
74 |
self.process_rql(req.form.get('rql')) |
b8fbb95dc0eb
process_rql now done in the controller
sylvain.thenault@logilab.fr
parents:
800
diff
changeset
|
75 |
return self.vreg.main_template(req, self.template, rset=self.rset, body=body) |
0 | 76 |
|
77 |
||
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
78 |
def entity_has_embedable_url(entity): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
79 |
"""return 1 if the entity provides an allowed embedable url""" |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
80 |
url = entity.embeded_url() |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
81 |
if not url or not url.strip(): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
82 |
return 0 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
83 |
allowed = entity.config['embed-allowed'] |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
84 |
if allowed is None or not allowed.match(url): |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
85 |
return 0 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
86 |
return 1 |
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
87 |
|
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
88 |
|
0 | 89 |
class EmbedAction(Action): |
90 |
"""display an 'embed' link on entity implementing `embeded_url` method |
|
91 |
if the returned url match embeding configuration |
|
92 |
""" |
|
93 |
id = 'embed' |
|
742
99115e029dca
replaced most of __selectors__ assignments with __select__
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
692
diff
changeset
|
94 |
__select__ = (one_line_rset() & match_search_state('normal') |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
95 |
& implements(IEmbedable) |
742
99115e029dca
replaced most of __selectors__ assignments with __select__
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
692
diff
changeset
|
96 |
& score_entity(entity_has_embedable_url)) |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
97 |
|
0 | 98 |
title = _('embed') |
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
99 |
controller = 'embed' |
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
100 |
|
0 | 101 |
def url(self, row=0): |
102 |
entity = self.rset.get_entity(row, 0) |
|
103 |
url = urljoin(self.req.base_url(), entity.embeded_url()) |
|
104 |
if self.req.form.has_key('rql'): |
|
105 |
return self.build_url(url=url, rql=self.req.form['rql']) |
|
106 |
return self.build_url(url=url) |
|
107 |
||
108 |
||
109 |
||
110 |
# functions doing necessary substitutions to embed an external html page ###### |
|
111 |
||
112 |
||
113 |
BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U) |
|
114 |
HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U) |
|
115 |
SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U) |
|
116 |
||
117 |
||
118 |
class replace_href: |
|
119 |
def __init__(self, prefix, custom_css=None): |
|
120 |
self.prefix = prefix |
|
121 |
self.custom_css = custom_css |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
122 |
|
0 | 123 |
def __call__(self, match): |
124 |
original_url = match.group(1) |
|
125 |
url = self.prefix + urlquote(original_url, safe='') |
|
126 |
if self.custom_css is not None: |
|
127 |
if '?' in url: |
|
128 |
url = '%s&custom_css=%s' % (url, self.custom_css) |
|
129 |
else: |
|
130 |
url = '%s?custom_css=%s' % (url, self.custom_css) |
|
131 |
return '<a href="%s"' % url |
|
132 |
||
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
133 |
|
0 | 134 |
class absolutize_links: |
135 |
def __init__(self, embedded_url, tag, custom_css=None): |
|
136 |
self.embedded_url = embedded_url |
|
137 |
self.tag = tag |
|
138 |
self.custom_css = custom_css |
|
1802
d628defebc17
delete-trailing-whitespace + some copyright update
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
1132
diff
changeset
|
139 |
|
0 | 140 |
def __call__(self, match): |
141 |
original_url = match.group(1) |
|
142 |
if '://' in original_url: |
|
143 |
return match.group(0) # leave it unchanged |
|
144 |
return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url)) |
|
145 |
||
146 |
||
147 |
def prefix_links(body, prefix, embedded_url, custom_css=None): |
|
148 |
filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)), |
|
149 |
(SRC_RGX, absolutize_links(embedded_url, '<img src')), |
|
150 |
(HREF_RGX, replace_href(prefix, custom_css))) |
|
151 |
for rgx, repl in filters: |
|
152 |
body = rgx.sub(repl, body) |
|
153 |
return body |
|
631
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
154 |
|
99f5852f8604
major selector refactoring (mostly to avoid looking for select parameters on the target class), start accept / interface unification)
sylvain.thenault@logilab.fr
parents:
431
diff
changeset
|
155 |
|
0 | 156 |
def embed_external_page(url, prefix, headers=None, custom_css=None): |
157 |
req = Request(url, headers=(headers or {})) |
|
158 |
content = urlopen(req).read() |
|
159 |
page_source = unicode(content, guess_encoding(content), 'replace') |
|
1132 | 160 |
page_source = page_source |
0 | 161 |
match = BODY_RGX.search(page_source) |
162 |
if match is None: |
|
163 |
return page_source |
|
164 |
return prefix_links(match.group(1), prefix, url, custom_css) |