0
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
1 |
"""Objects interacting together to provides the external page embeding |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
2 |
functionality. |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
3 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
4 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
5 |
:organization: Logilab |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
6 |
:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
7 |
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
8 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
9 |
__docformat__ = "restructuredtext en" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
10 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
11 |
import re |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
12 |
from urlparse import urljoin |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
13 |
from urllib2 import urlopen, Request, HTTPError |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
14 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
15 |
from logilab.mtconverter import guess_encoding |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
16 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
17 |
from cubicweb import urlquote # XXX should use view.url_quote method |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
18 |
from cubicweb.interfaces import IEmbedable |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
19 |
from cubicweb.common.uilib import soup2xhtml |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
20 |
from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
21 |
searchstate_selector, interface_selector) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
22 |
from cubicweb.common.view import NOINDEX, NOFOLLOW |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
23 |
from cubicweb.web.controller import Controller |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
24 |
from cubicweb.web.action import Action |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
25 |
from cubicweb.web.views import basetemplates |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
26 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
27 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
28 |
class ExternalTemplate(basetemplates.TheMainTemplate): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
29 |
"""template embeding an external web pages into CubicWeb web interface |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
30 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
31 |
id = 'external' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
32 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
33 |
def call(self, body): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
34 |
# XXX fallback to HTML 4 mode when embeding ? |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
35 |
self.set_request_content_type() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
36 |
self.process_rql(self.req.form.get('rql')) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
37 |
self.req.search_state = ('normal',) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
38 |
self.template_header(self.content_type, None, self.req._('external page'), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
39 |
[NOINDEX, NOFOLLOW]) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
40 |
self.content_header() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
41 |
self.w(body) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
42 |
self.content_footer() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
43 |
self.template_footer() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
44 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
45 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
46 |
class EmbedController(Controller): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
47 |
id = 'embed' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
48 |
template = 'external' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
49 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
50 |
def publish(self, rset=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
51 |
req = self.req |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
52 |
if 'custom_css' in req.form: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
53 |
req.add_css(req.form['custom_css']) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
54 |
embedded_url = req.form['url'] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
55 |
allowed = self.config['embed-allowed'] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
56 |
_ = req._ |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
57 |
if allowed is None or not allowed.match(embedded_url): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
58 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
59 |
_('error while embedding page'), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
60 |
_('embedding this url is forbidden')) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
61 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
62 |
prefix = req.build_url(self.id, url='') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
63 |
authorization = req.get_header('Authorization') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
64 |
if authorization: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
65 |
headers = {'Authorization' : authorization} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
66 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
67 |
headers = {} |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
68 |
try: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
69 |
body = embed_external_page(embedded_url, prefix, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
70 |
headers, req.form.get('custom_css')) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
71 |
body = soup2xhtml(body, self.req.encoding) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
72 |
except HTTPError, err: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
73 |
body = '<h2>%s</h2><h3>%s</h3>' % ( |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
74 |
_('error while embedding page'), err) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
75 |
return self.vreg.main_template(req, self.template, body=body) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
76 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
77 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
78 |
class EmbedAction(Action): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
79 |
"""display an 'embed' link on entity implementing `embeded_url` method |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
80 |
if the returned url match embeding configuration |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
81 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
82 |
id = 'embed' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
83 |
controller = 'embed' |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
84 |
__selectors__ = (onelinerset_selector, searchstate_selector, |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
85 |
interface_selector, score_entity_selector) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
86 |
accepts_interfaces = (IEmbedable,) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
87 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
88 |
title = _('embed') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
89 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
90 |
@classmethod |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
91 |
def score_entity(cls, entity): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
92 |
"""return a score telling how well I can display the given |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
93 |
entity instance (required by the value_selector) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
94 |
""" |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
95 |
url = entity.embeded_url() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
96 |
if not url or not url.strip(): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
97 |
return 0 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
98 |
allowed = cls.config['embed-allowed'] |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
99 |
if allowed is None or not allowed.match(url): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
100 |
return 0 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
101 |
return 1 |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
102 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
103 |
def url(self, row=0): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
104 |
entity = self.rset.get_entity(row, 0) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
105 |
url = urljoin(self.req.base_url(), entity.embeded_url()) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
106 |
if self.req.form.has_key('rql'): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
107 |
return self.build_url(url=url, rql=self.req.form['rql']) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
108 |
return self.build_url(url=url) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
109 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
110 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
111 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
112 |
# functions doing necessary substitutions to embed an external html page ###### |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
113 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
114 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
115 |
BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
116 |
HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
117 |
SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
118 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
119 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
120 |
class replace_href: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
121 |
def __init__(self, prefix, custom_css=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
122 |
self.prefix = prefix |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
123 |
self.custom_css = custom_css |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
124 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
125 |
def __call__(self, match): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
126 |
original_url = match.group(1) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
127 |
url = self.prefix + urlquote(original_url, safe='') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
128 |
if self.custom_css is not None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
129 |
if '?' in url: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
130 |
url = '%s&custom_css=%s' % (url, self.custom_css) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
131 |
else: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
132 |
url = '%s?custom_css=%s' % (url, self.custom_css) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
133 |
return '<a href="%s"' % url |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
134 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
135 |
class absolutize_links: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
136 |
def __init__(self, embedded_url, tag, custom_css=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
137 |
self.embedded_url = embedded_url |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
138 |
self.tag = tag |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
139 |
self.custom_css = custom_css |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
140 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
141 |
def __call__(self, match): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
142 |
original_url = match.group(1) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
143 |
if '://' in original_url: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
144 |
return match.group(0) # leave it unchanged |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
145 |
return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url)) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
146 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
147 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
148 |
def prefix_links(body, prefix, embedded_url, custom_css=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
149 |
filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
150 |
(SRC_RGX, absolutize_links(embedded_url, '<img src')), |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
151 |
(HREF_RGX, replace_href(prefix, custom_css))) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
152 |
for rgx, repl in filters: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
153 |
body = rgx.sub(repl, body) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
154 |
return body |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
155 |
|
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
156 |
def embed_external_page(url, prefix, headers=None, custom_css=None): |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
157 |
req = Request(url, headers=(headers or {})) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
158 |
content = urlopen(req).read() |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
159 |
page_source = unicode(content, guess_encoding(content), 'replace') |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
160 |
page_source =page_source |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
161 |
match = BODY_RGX.search(page_source) |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
162 |
if match is None: |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
163 |
return page_source |
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff
changeset
|
164 |
return prefix_links(match.group(1), prefix, url, custom_css) |