web/views/embedding.py
changeset 0 b97547f5f1fa
child 237 3df2e0ae2eba
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/views/embedding.py	Wed Nov 05 15:52:50 2008 +0100
@@ -0,0 +1,164 @@
+"""Objects interacting together to provides the external page embeding
+functionality.
+
+
+:organization: Logilab
+:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
+:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
+"""
+__docformat__ = "restructuredtext en"
+
+import re
+from urlparse import urljoin
+from urllib2 import urlopen, Request, HTTPError
+
+from logilab.mtconverter import guess_encoding
+
+from cubicweb import urlquote # XXX should use view.url_quote method
+from cubicweb.interfaces import IEmbedable
+from cubicweb.common.uilib import soup2xhtml
+from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector,
+                                    searchstate_selector, interface_selector)
+from cubicweb.common.view import NOINDEX, NOFOLLOW
+from cubicweb.web.controller import Controller
+from cubicweb.web.action import Action
+from cubicweb.web.views import basetemplates
+
+
+class ExternalTemplate(basetemplates.TheMainTemplate):
+    """template embeding an external web pages into CubicWeb web interface
+    """
+    id = 'external'
+    
+    def call(self, body):
+        # XXX fallback to HTML 4 mode when embeding ?
+        self.set_request_content_type()
+        self.process_rql(self.req.form.get('rql'))
+        self.req.search_state = ('normal',)
+        self.template_header(self.content_type, None, self.req._('external page'),
+                             [NOINDEX, NOFOLLOW])
+        self.content_header()
+        self.w(body)
+        self.content_footer()
+        self.template_footer()
+
+
+class EmbedController(Controller):
+    id = 'embed'
+    template = 'external'
+
+    def publish(self, rset=None):
+        req = self.req
+        if 'custom_css' in req.form:
+            req.add_css(req.form['custom_css'])
+        embedded_url = req.form['url']
+        allowed = self.config['embed-allowed']
+        _ = req._
+        if allowed is None or not allowed.match(embedded_url):
+            body = '<h2>%s</h2><h3>%s</h3>' % (
+                _('error while embedding page'),
+                _('embedding this url is forbidden'))
+        else:
+            prefix = req.build_url(self.id, url='')
+            authorization = req.get_header('Authorization')
+            if authorization:
+                headers = {'Authorization' : authorization}
+            else:
+                headers = {}
+            try:
+                body = embed_external_page(embedded_url, prefix,
+                                           headers, req.form.get('custom_css'))
+                body = soup2xhtml(body, self.req.encoding)
+            except HTTPError, err:
+                body = '<h2>%s</h2><h3>%s</h3>' % (
+                    _('error while embedding page'), err)
+        return self.vreg.main_template(req, self.template, body=body)
+
+
+class EmbedAction(Action):
+    """display an 'embed' link on entity implementing `embeded_url` method
+    if the returned url match embeding configuration
+    """
+    id = 'embed'
+    controller = 'embed'
+    __selectors__ = (onelinerset_selector, searchstate_selector,
+                     interface_selector, score_entity_selector)
+    accepts_interfaces = (IEmbedable,)
+    
+    title = _('embed')
+        
+    @classmethod
+    def score_entity(cls, entity):
+        """return a score telling how well I can display the given 
+        entity instance (required by the value_selector)
+        """
+        url = entity.embeded_url()
+        if not url or not url.strip():
+            return 0
+        allowed = cls.config['embed-allowed']
+        if allowed is None or not allowed.match(url):
+            return 0
+        return 1
+    
+    def url(self, row=0):
+        entity = self.rset.get_entity(row, 0)
+        url = urljoin(self.req.base_url(), entity.embeded_url())
+        if self.req.form.has_key('rql'):
+            return self.build_url(url=url, rql=self.req.form['rql'])
+        return self.build_url(url=url)
+
+
+
+# functions doing necessary substitutions to embed an external html page ######
+
+
+BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U)
+HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U)
+SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U)
+
+
+class replace_href:
+    def __init__(self, prefix, custom_css=None):
+        self.prefix = prefix
+        self.custom_css = custom_css
+        
+    def __call__(self, match):
+        original_url = match.group(1)
+        url = self.prefix + urlquote(original_url, safe='')
+        if self.custom_css is not None:
+            if '?' in url:
+                url = '%s&amp;custom_css=%s' % (url, self.custom_css)
+            else:
+                url = '%s?custom_css=%s' % (url, self.custom_css)
+        return '<a href="%s"' % url
+
+class absolutize_links:
+    def __init__(self, embedded_url, tag, custom_css=None):
+        self.embedded_url = embedded_url
+        self.tag = tag
+        self.custom_css = custom_css
+    
+    def __call__(self, match):
+        original_url = match.group(1)
+        if '://' in original_url:
+            return match.group(0) # leave it unchanged
+        return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url))
+
+
+def prefix_links(body, prefix, embedded_url, custom_css=None):
+    filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)),
+               (SRC_RGX, absolutize_links(embedded_url, '<img src')),
+               (HREF_RGX, replace_href(prefix, custom_css)))
+    for rgx, repl in filters:
+        body = rgx.sub(repl, body)
+    return body
+    
+def embed_external_page(url, prefix, headers=None, custom_css=None):
+    req = Request(url, headers=(headers or {}))
+    content = urlopen(req).read()
+    page_source = unicode(content, guess_encoding(content), 'replace')
+    page_source =page_source
+    match = BODY_RGX.search(page_source)
+    if match is None:
+        return page_source
+    return prefix_links(match.group(1), prefix, url, custom_css)