--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/views/embedding.py Wed Nov 05 15:52:50 2008 +0100
@@ -0,0 +1,164 @@
+"""Objects interacting together to provides the external page embeding
+functionality.
+
+
+:organization: Logilab
+:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
+"""
+__docformat__ = "restructuredtext en"
+
+import re
+from urlparse import urljoin
+from urllib2 import urlopen, Request, HTTPError
+
+from logilab.mtconverter import guess_encoding
+
+from cubicweb import urlquote # XXX should use view.url_quote method
+from cubicweb.interfaces import IEmbedable
+from cubicweb.common.uilib import soup2xhtml
+from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector,
+ searchstate_selector, interface_selector)
+from cubicweb.common.view import NOINDEX, NOFOLLOW
+from cubicweb.web.controller import Controller
+from cubicweb.web.action import Action
+from cubicweb.web.views import basetemplates
+
+
+class ExternalTemplate(basetemplates.TheMainTemplate):
+ """template embeding an external web pages into CubicWeb web interface
+ """
+ id = 'external'
+
+ def call(self, body):
+ # XXX fallback to HTML 4 mode when embeding ?
+ self.set_request_content_type()
+ self.process_rql(self.req.form.get('rql'))
+ self.req.search_state = ('normal',)
+ self.template_header(self.content_type, None, self.req._('external page'),
+ [NOINDEX, NOFOLLOW])
+ self.content_header()
+ self.w(body)
+ self.content_footer()
+ self.template_footer()
+
+
+class EmbedController(Controller):
+ id = 'embed'
+ template = 'external'
+
+ def publish(self, rset=None):
+ req = self.req
+ if 'custom_css' in req.form:
+ req.add_css(req.form['custom_css'])
+ embedded_url = req.form['url']
+ allowed = self.config['embed-allowed']
+ _ = req._
+ if allowed is None or not allowed.match(embedded_url):
+ body = '<h2>%s</h2><h3>%s</h3>' % (
+ _('error while embedding page'),
+ _('embedding this url is forbidden'))
+ else:
+ prefix = req.build_url(self.id, url='')
+ authorization = req.get_header('Authorization')
+ if authorization:
+ headers = {'Authorization' : authorization}
+ else:
+ headers = {}
+ try:
+ body = embed_external_page(embedded_url, prefix,
+ headers, req.form.get('custom_css'))
+ body = soup2xhtml(body, self.req.encoding)
+ except HTTPError, err:
+ body = '<h2>%s</h2><h3>%s</h3>' % (
+ _('error while embedding page'), err)
+ return self.vreg.main_template(req, self.template, body=body)
+
+
+class EmbedAction(Action):
+ """display an 'embed' link on entity implementing `embeded_url` method
+ if the returned url match embeding configuration
+ """
+ id = 'embed'
+ controller = 'embed'
+ __selectors__ = (onelinerset_selector, searchstate_selector,
+ interface_selector, score_entity_selector)
+ accepts_interfaces = (IEmbedable,)
+
+ title = _('embed')
+
+ @classmethod
+ def score_entity(cls, entity):
+ """return a score telling how well I can display the given
+ entity instance (required by the value_selector)
+ """
+ url = entity.embeded_url()
+ if not url or not url.strip():
+ return 0
+ allowed = cls.config['embed-allowed']
+ if allowed is None or not allowed.match(url):
+ return 0
+ return 1
+
+ def url(self, row=0):
+ entity = self.rset.get_entity(row, 0)
+ url = urljoin(self.req.base_url(), entity.embeded_url())
+ if self.req.form.has_key('rql'):
+ return self.build_url(url=url, rql=self.req.form['rql'])
+ return self.build_url(url=url)
+
+
+
+# functions doing necessary substitutions to embed an external html page ######
+
+
+BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U)
+HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U)
+SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U)
+
+
+class replace_href:
+ def __init__(self, prefix, custom_css=None):
+ self.prefix = prefix
+ self.custom_css = custom_css
+
+ def __call__(self, match):
+ original_url = match.group(1)
+ url = self.prefix + urlquote(original_url, safe='')
+ if self.custom_css is not None:
+ if '?' in url:
+ url = '%s&custom_css=%s' % (url, self.custom_css)
+ else:
+ url = '%s?custom_css=%s' % (url, self.custom_css)
+ return '<a href="%s"' % url
+
+class absolutize_links:
+ def __init__(self, embedded_url, tag, custom_css=None):
+ self.embedded_url = embedded_url
+ self.tag = tag
+ self.custom_css = custom_css
+
+ def __call__(self, match):
+ original_url = match.group(1)
+ if '://' in original_url:
+ return match.group(0) # leave it unchanged
+ return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url))
+
+
+def prefix_links(body, prefix, embedded_url, custom_css=None):
+ filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)),
+ (SRC_RGX, absolutize_links(embedded_url, '<img src')),
+ (HREF_RGX, replace_href(prefix, custom_css)))
+ for rgx, repl in filters:
+ body = rgx.sub(repl, body)
+ return body
+
+def embed_external_page(url, prefix, headers=None, custom_css=None):
+ req = Request(url, headers=(headers or {}))
+ content = urlopen(req).read()
+ page_source = unicode(content, guess_encoding(content), 'replace')
+ page_source =page_source
+ match = BODY_RGX.search(page_source)
+ if match is None:
+ return page_source
+ return prefix_links(match.group(1), prefix, url, custom_css)