|
1 """Objects interacting together to provides the external page embeding |
|
2 functionality. |
|
3 |
|
4 |
|
5 :organization: Logilab |
|
6 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
|
7 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr |
|
8 """ |
|
9 __docformat__ = "restructuredtext en" |
|
10 |
|
11 import re |
|
12 from urlparse import urljoin |
|
13 from urllib2 import urlopen, Request, HTTPError |
|
14 |
|
15 from logilab.mtconverter import guess_encoding |
|
16 |
|
17 from cubicweb import urlquote # XXX should use view.url_quote method |
|
18 from cubicweb.interfaces import IEmbedable |
|
19 from cubicweb.common.uilib import soup2xhtml |
|
20 from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector, |
|
21 searchstate_selector, interface_selector) |
|
22 from cubicweb.common.view import NOINDEX, NOFOLLOW |
|
23 from cubicweb.web.controller import Controller |
|
24 from cubicweb.web.action import Action |
|
25 from cubicweb.web.views import basetemplates |
|
26 |
|
27 |
|
28 class ExternalTemplate(basetemplates.TheMainTemplate): |
|
29 """template embeding an external web pages into CubicWeb web interface |
|
30 """ |
|
31 id = 'external' |
|
32 |
|
33 def call(self, body): |
|
34 # XXX fallback to HTML 4 mode when embeding ? |
|
35 self.set_request_content_type() |
|
36 self.process_rql(self.req.form.get('rql')) |
|
37 self.req.search_state = ('normal',) |
|
38 self.template_header(self.content_type, None, self.req._('external page'), |
|
39 [NOINDEX, NOFOLLOW]) |
|
40 self.content_header() |
|
41 self.w(body) |
|
42 self.content_footer() |
|
43 self.template_footer() |
|
44 |
|
45 |
|
46 class EmbedController(Controller): |
|
47 id = 'embed' |
|
48 template = 'external' |
|
49 |
|
50 def publish(self, rset=None): |
|
51 req = self.req |
|
52 if 'custom_css' in req.form: |
|
53 req.add_css(req.form['custom_css']) |
|
54 embedded_url = req.form['url'] |
|
55 allowed = self.config['embed-allowed'] |
|
56 _ = req._ |
|
57 if allowed is None or not allowed.match(embedded_url): |
|
58 body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
59 _('error while embedding page'), |
|
60 _('embedding this url is forbidden')) |
|
61 else: |
|
62 prefix = req.build_url(self.id, url='') |
|
63 authorization = req.get_header('Authorization') |
|
64 if authorization: |
|
65 headers = {'Authorization' : authorization} |
|
66 else: |
|
67 headers = {} |
|
68 try: |
|
69 body = embed_external_page(embedded_url, prefix, |
|
70 headers, req.form.get('custom_css')) |
|
71 body = soup2xhtml(body, self.req.encoding) |
|
72 except HTTPError, err: |
|
73 body = '<h2>%s</h2><h3>%s</h3>' % ( |
|
74 _('error while embedding page'), err) |
|
75 return self.vreg.main_template(req, self.template, body=body) |
|
76 |
|
77 |
|
78 class EmbedAction(Action): |
|
79 """display an 'embed' link on entity implementing `embeded_url` method |
|
80 if the returned url match embeding configuration |
|
81 """ |
|
82 id = 'embed' |
|
83 controller = 'embed' |
|
84 __selectors__ = (onelinerset_selector, searchstate_selector, |
|
85 interface_selector, score_entity_selector) |
|
86 accepts_interfaces = (IEmbedable,) |
|
87 |
|
88 title = _('embed') |
|
89 |
|
90 @classmethod |
|
91 def score_entity(cls, entity): |
|
92 """return a score telling how well I can display the given |
|
93 entity instance (required by the value_selector) |
|
94 """ |
|
95 url = entity.embeded_url() |
|
96 if not url or not url.strip(): |
|
97 return 0 |
|
98 allowed = cls.config['embed-allowed'] |
|
99 if allowed is None or not allowed.match(url): |
|
100 return 0 |
|
101 return 1 |
|
102 |
|
103 def url(self, row=0): |
|
104 entity = self.rset.get_entity(row, 0) |
|
105 url = urljoin(self.req.base_url(), entity.embeded_url()) |
|
106 if self.req.form.has_key('rql'): |
|
107 return self.build_url(url=url, rql=self.req.form['rql']) |
|
108 return self.build_url(url=url) |
|
109 |
|
110 |
|
111 |
|
112 # functions doing necessary substitutions to embed an external html page ###### |
|
113 |
|
114 |
|
115 BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U) |
|
116 HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U) |
|
117 SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U) |
|
118 |
|
119 |
|
120 class replace_href: |
|
121 def __init__(self, prefix, custom_css=None): |
|
122 self.prefix = prefix |
|
123 self.custom_css = custom_css |
|
124 |
|
125 def __call__(self, match): |
|
126 original_url = match.group(1) |
|
127 url = self.prefix + urlquote(original_url, safe='') |
|
128 if self.custom_css is not None: |
|
129 if '?' in url: |
|
130 url = '%s&custom_css=%s' % (url, self.custom_css) |
|
131 else: |
|
132 url = '%s?custom_css=%s' % (url, self.custom_css) |
|
133 return '<a href="%s"' % url |
|
134 |
|
135 class absolutize_links: |
|
136 def __init__(self, embedded_url, tag, custom_css=None): |
|
137 self.embedded_url = embedded_url |
|
138 self.tag = tag |
|
139 self.custom_css = custom_css |
|
140 |
|
141 def __call__(self, match): |
|
142 original_url = match.group(1) |
|
143 if '://' in original_url: |
|
144 return match.group(0) # leave it unchanged |
|
145 return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url)) |
|
146 |
|
147 |
|
148 def prefix_links(body, prefix, embedded_url, custom_css=None): |
|
149 filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)), |
|
150 (SRC_RGX, absolutize_links(embedded_url, '<img src')), |
|
151 (HREF_RGX, replace_href(prefix, custom_css))) |
|
152 for rgx, repl in filters: |
|
153 body = rgx.sub(repl, body) |
|
154 return body |
|
155 |
|
156 def embed_external_page(url, prefix, headers=None, custom_css=None): |
|
157 req = Request(url, headers=(headers or {})) |
|
158 content = urlopen(req).read() |
|
159 page_source = unicode(content, guess_encoding(content), 'replace') |
|
160 page_source =page_source |
|
161 match = BODY_RGX.search(page_source) |
|
162 if match is None: |
|
163 return page_source |
|
164 return prefix_links(match.group(1), prefix, url, custom_css) |