web/views/embedding.py
changeset 0 b97547f5f1fa
child 237 3df2e0ae2eba
equal deleted inserted replaced
-1:000000000000 0:b97547f5f1fa
       
     1 """Objects interacting together to provides the external page embeding
       
     2 functionality.
       
     3 
       
     4 
       
     5 :organization: Logilab
       
     6 :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
       
     7 :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
       
     8 """
       
     9 __docformat__ = "restructuredtext en"
       
    10 
       
    11 import re
       
    12 from urlparse import urljoin
       
    13 from urllib2 import urlopen, Request, HTTPError
       
    14 
       
    15 from logilab.mtconverter import guess_encoding
       
    16 
       
    17 from cubicweb import urlquote # XXX should use view.url_quote method
       
    18 from cubicweb.interfaces import IEmbedable
       
    19 from cubicweb.common.uilib import soup2xhtml
       
    20 from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector,
       
    21                                     searchstate_selector, interface_selector)
       
    22 from cubicweb.common.view import NOINDEX, NOFOLLOW
       
    23 from cubicweb.web.controller import Controller
       
    24 from cubicweb.web.action import Action
       
    25 from cubicweb.web.views import basetemplates
       
    26 
       
    27 
       
    28 class ExternalTemplate(basetemplates.TheMainTemplate):
       
    29     """template embeding an external web pages into CubicWeb web interface
       
    30     """
       
    31     id = 'external'
       
    32     
       
    33     def call(self, body):
       
    34         # XXX fallback to HTML 4 mode when embeding ?
       
    35         self.set_request_content_type()
       
    36         self.process_rql(self.req.form.get('rql'))
       
    37         self.req.search_state = ('normal',)
       
    38         self.template_header(self.content_type, None, self.req._('external page'),
       
    39                              [NOINDEX, NOFOLLOW])
       
    40         self.content_header()
       
    41         self.w(body)
       
    42         self.content_footer()
       
    43         self.template_footer()
       
    44 
       
    45 
       
    46 class EmbedController(Controller):
       
    47     id = 'embed'
       
    48     template = 'external'
       
    49 
       
    50     def publish(self, rset=None):
       
    51         req = self.req
       
    52         if 'custom_css' in req.form:
       
    53             req.add_css(req.form['custom_css'])
       
    54         embedded_url = req.form['url']
       
    55         allowed = self.config['embed-allowed']
       
    56         _ = req._
       
    57         if allowed is None or not allowed.match(embedded_url):
       
    58             body = '<h2>%s</h2><h3>%s</h3>' % (
       
    59                 _('error while embedding page'),
       
    60                 _('embedding this url is forbidden'))
       
    61         else:
       
    62             prefix = req.build_url(self.id, url='')
       
    63             authorization = req.get_header('Authorization')
       
    64             if authorization:
       
    65                 headers = {'Authorization' : authorization}
       
    66             else:
       
    67                 headers = {}
       
    68             try:
       
    69                 body = embed_external_page(embedded_url, prefix,
       
    70                                            headers, req.form.get('custom_css'))
       
    71                 body = soup2xhtml(body, self.req.encoding)
       
    72             except HTTPError, err:
       
    73                 body = '<h2>%s</h2><h3>%s</h3>' % (
       
    74                     _('error while embedding page'), err)
       
    75         return self.vreg.main_template(req, self.template, body=body)
       
    76 
       
    77 
       
    78 class EmbedAction(Action):
       
    79     """display an 'embed' link on entity implementing `embeded_url` method
       
    80     if the returned url match embeding configuration
       
    81     """
       
    82     id = 'embed'
       
    83     controller = 'embed'
       
    84     __selectors__ = (onelinerset_selector, searchstate_selector,
       
    85                      interface_selector, score_entity_selector)
       
    86     accepts_interfaces = (IEmbedable,)
       
    87     
       
    88     title = _('embed')
       
    89         
       
    90     @classmethod
       
    91     def score_entity(cls, entity):
       
    92         """return a score telling how well I can display the given 
       
    93         entity instance (required by the value_selector)
       
    94         """
       
    95         url = entity.embeded_url()
       
    96         if not url or not url.strip():
       
    97             return 0
       
    98         allowed = cls.config['embed-allowed']
       
    99         if allowed is None or not allowed.match(url):
       
   100             return 0
       
   101         return 1
       
   102     
       
   103     def url(self, row=0):
       
   104         entity = self.rset.get_entity(row, 0)
       
   105         url = urljoin(self.req.base_url(), entity.embeded_url())
       
   106         if self.req.form.has_key('rql'):
       
   107             return self.build_url(url=url, rql=self.req.form['rql'])
       
   108         return self.build_url(url=url)
       
   109 
       
   110 
       
   111 
       
   112 # functions doing necessary substitutions to embed an external html page ######
       
   113 
       
   114 
       
   115 BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U)
       
   116 HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U)
       
   117 SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U)
       
   118 
       
   119 
       
   120 class replace_href:
       
   121     def __init__(self, prefix, custom_css=None):
       
   122         self.prefix = prefix
       
   123         self.custom_css = custom_css
       
   124         
       
   125     def __call__(self, match):
       
   126         original_url = match.group(1)
       
   127         url = self.prefix + urlquote(original_url, safe='')
       
   128         if self.custom_css is not None:
       
   129             if '?' in url:
       
   130                 url = '%s&amp;custom_css=%s' % (url, self.custom_css)
       
   131             else:
       
   132                 url = '%s?custom_css=%s' % (url, self.custom_css)
       
   133         return '<a href="%s"' % url
       
   134 
       
   135 class absolutize_links:
       
   136     def __init__(self, embedded_url, tag, custom_css=None):
       
   137         self.embedded_url = embedded_url
       
   138         self.tag = tag
       
   139         self.custom_css = custom_css
       
   140     
       
   141     def __call__(self, match):
       
   142         original_url = match.group(1)
       
   143         if '://' in original_url:
       
   144             return match.group(0) # leave it unchanged
       
   145         return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url))
       
   146 
       
   147 
       
   148 def prefix_links(body, prefix, embedded_url, custom_css=None):
       
   149     filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)),
       
   150                (SRC_RGX, absolutize_links(embedded_url, '<img src')),
       
   151                (HREF_RGX, replace_href(prefix, custom_css)))
       
   152     for rgx, repl in filters:
       
   153         body = rgx.sub(repl, body)
       
   154     return body
       
   155     
       
   156 def embed_external_page(url, prefix, headers=None, custom_css=None):
       
   157     req = Request(url, headers=(headers or {}))
       
   158     content = urlopen(req).read()
       
   159     page_source = unicode(content, guess_encoding(content), 'replace')
       
   160     page_source =page_source
       
   161     match = BODY_RGX.search(page_source)
       
   162     if match is None:
       
   163         return page_source
       
   164     return prefix_links(match.group(1), prefix, url, custom_css)