web/views/embedding.py
changeset 8803 47dd517d6c6f
parent 8697 574bb05e40a4
equal deleted inserted replaced
8802:d92919c995cc 8803:47dd517d6c6f
     1 # copyright 2003-2010 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     1 # copyright 2003-2013 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
     2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
     3 #
     3 #
     4 # This file is part of CubicWeb.
     4 # This file is part of CubicWeb.
     5 #
     5 #
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
     6 # CubicWeb is free software: you can redistribute it and/or modify it under the
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    18 """Objects interacting together to provides the external page embeding
    18 """Objects interacting together to provides the external page embeding
    19 functionality.
    19 functionality.
    20 """
    20 """
    21 
    21 
    22 __docformat__ = "restructuredtext en"
    22 from logilab.common.deprecation import class_moved, moved
    23 _ = unicode
       
    24 
    23 
    25 import re
    24 try:
    26 from urlparse import urljoin
    25     from cubes.embed.views import *
    27 from urllib2 import urlopen, Request, HTTPError
       
    28 from urllib import quote as urlquote # XXX should use view.url_quote method
       
    29 
    26 
    30 from logilab.mtconverter import guess_encoding
    27     IEmbedableAdapter = class_moved(IEmbedableAdapter, message='[3.17] IEmbedableAdapter moved to cubes.embed.views')
    31 
    28     ExternalTemplate = class_moved(ExternalTemplate, message='[3.17] IEmbedableAdapter moved to cubes.embed.views')
    32 from cubicweb.predicates import (one_line_rset, score_entity, implements,
    29     EmbedController = class_moved(EmbedController, message='[3.17] IEmbedableAdapter moved to cubes.embed.views')
    33                                 adaptable, match_search_state)
    30     entity_has_embedable_url = moved('cubes.embed.views', 'entity_has_embedable_url')
    34 from cubicweb.interfaces import IEmbedable
    31     EmbedAction = class_moved(EmbedAction, message='[3.17] EmbedAction moved to cubes.embed.views')
    35 from cubicweb.view import NOINDEX, NOFOLLOW, EntityAdapter, implements_adapter_compat
    32     replace_href = class_moved(replace_href, message='[3.17] replace_href moved to cubes.embed.views')
    36 from cubicweb.uilib import soup2xhtml
    33     embed_external_page = moved('cubes.embed.views', 'embed_external_page')
    37 from cubicweb.web.controller import Controller
    34     absolutize_links = class_moved(absolutize_links, message='[3.17] absolutize_links moved to cubes.embed.views')
    38 from cubicweb.web.action import Action
    35     prefix_links = moved('cubes.embed.views', 'prefix_links')
    39 from cubicweb.web.views import basetemplates
    36 except ImportError:
    40 
    37     from cubicweb.web import LOGGER
    41 
    38     LOGGER.warning('[3.17] embedding extracted to cube embed that was not found. try installing it.')
    42 class IEmbedableAdapter(EntityAdapter):
       
    43     """interface for embedable entities"""
       
    44     __needs_bw_compat__ = True
       
    45     __regid__ = 'IEmbedable'
       
    46     __select__ = implements(IEmbedable, warn=False) # XXX for bw compat, should be abstract
       
    47 
       
    48     @implements_adapter_compat('IEmbedable')
       
    49     def embeded_url(self):
       
    50         """embed action interface"""
       
    51         raise NotImplementedError
       
    52 
       
    53 
       
    54 class ExternalTemplate(basetemplates.TheMainTemplate):
       
    55     """template embeding an external web pages into CubicWeb web interface
       
    56     """
       
    57     __regid__ = 'external'
       
    58 
       
    59     def call(self, body):
       
    60         # XXX fallback to HTML 4 mode when embeding ?
       
    61         self.set_request_content_type()
       
    62         self._cw.search_state = ('normal',)
       
    63         self.template_header(self.content_type, None, self._cw._('external page'),
       
    64                              [NOINDEX, NOFOLLOW])
       
    65         self.content_header()
       
    66         self.w(body)
       
    67         self.content_footer()
       
    68         self.template_footer()
       
    69 
       
    70 
       
    71 class EmbedController(Controller):
       
    72     __regid__ = 'embed'
       
    73     template = 'external'
       
    74 
       
    75     def publish(self, rset=None):
       
    76         req = self._cw
       
    77         if 'custom_css' in req.form:
       
    78             req.add_css(req.form['custom_css'])
       
    79         embedded_url = req.form['url']
       
    80         allowed = self._cw.vreg.config['embed-allowed']
       
    81         _ = req._
       
    82         if allowed is None or not allowed.match(embedded_url):
       
    83             body = '<h2>%s</h2><h3>%s</h3>' % (
       
    84                 _('error while embedding page'),
       
    85                 _('embedding this url is forbidden'))
       
    86         else:
       
    87             prefix = req.build_url(self.__regid__, url='')
       
    88             authorization = req.get_header('Authorization')
       
    89             if authorization:
       
    90                 headers = {'Authorization' : authorization}
       
    91             else:
       
    92                 headers = {}
       
    93             try:
       
    94                 body = embed_external_page(embedded_url, prefix,
       
    95                                            headers, req.form.get('custom_css'))
       
    96                 body = soup2xhtml(body, self._cw.encoding)
       
    97             except HTTPError as err:
       
    98                 body = '<h2>%s</h2><h3>%s</h3>' % (
       
    99                     _('error while embedding page'), err)
       
   100         rset = self.process_rql()
       
   101         return self._cw.vreg['views'].main_template(req, self.template,
       
   102                                                     rset=rset, body=body)
       
   103 
       
   104 
       
   105 def entity_has_embedable_url(entity):
       
   106     """return 1 if the entity provides an allowed embedable url"""
       
   107     url = entity.cw_adapt_to('IEmbedable').embeded_url()
       
   108     if not url or not url.strip():
       
   109         return 0
       
   110     allowed = entity._cw.vreg.config['embed-allowed']
       
   111     if allowed is None or not allowed.match(url):
       
   112         return 0
       
   113     return 1
       
   114 
       
   115 
       
   116 class EmbedAction(Action):
       
   117     """display an 'embed' link on entity implementing `embeded_url` method
       
   118     if the returned url match embeding configuration
       
   119     """
       
   120     __regid__ = 'embed'
       
   121     __select__ = (one_line_rset() & match_search_state('normal')
       
   122                   & adaptable('IEmbedable')
       
   123                   & score_entity(entity_has_embedable_url))
       
   124 
       
   125     title = _('embed')
       
   126 
       
   127     def url(self, row=0):
       
   128         entity = self.cw_rset.get_entity(row, 0)
       
   129         url = urljoin(self._cw.base_url(), entity.cw_adapt_to('IEmbedable').embeded_url())
       
   130         if 'rql' in self._cw.form:
       
   131             return self._cw.build_url('embed', url=url, rql=self._cw.form['rql'])
       
   132         return self._cw.build_url('embed', url=url)
       
   133 
       
   134 
       
   135 
       
   136 # functions doing necessary substitutions to embed an external html page ######
       
   137 
       
   138 
       
   139 BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U)
       
   140 HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U)
       
   141 SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U)
       
   142 
       
   143 
       
   144 class replace_href:
       
   145     def __init__(self, prefix, custom_css=None):
       
   146         self.prefix = prefix
       
   147         self.custom_css = custom_css
       
   148 
       
   149     def __call__(self, match):
       
   150         original_url = match.group(1)
       
   151         url = self.prefix + urlquote(original_url, safe='')
       
   152         if self.custom_css is not None:
       
   153             if '?' in url:
       
   154                 url = '%s&amp;custom_css=%s' % (url, self.custom_css)
       
   155             else:
       
   156                 url = '%s?custom_css=%s' % (url, self.custom_css)
       
   157         return '<a href="%s"' % url
       
   158 
       
   159 
       
   160 class absolutize_links:
       
   161     def __init__(self, embedded_url, tag, custom_css=None):
       
   162         self.embedded_url = embedded_url
       
   163         self.tag = tag
       
   164         self.custom_css = custom_css
       
   165 
       
   166     def __call__(self, match):
       
   167         original_url = match.group(1)
       
   168         if '://' in original_url:
       
   169             return match.group(0) # leave it unchanged
       
   170         return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url))
       
   171 
       
   172 
       
   173 def prefix_links(body, prefix, embedded_url, custom_css=None):
       
   174     filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)),
       
   175                (SRC_RGX, absolutize_links(embedded_url, '<img src')),
       
   176                (HREF_RGX, replace_href(prefix, custom_css)))
       
   177     for rgx, repl in filters:
       
   178         body = rgx.sub(repl, body)
       
   179     return body
       
   180 
       
   181 
       
   182 def embed_external_page(url, prefix, headers=None, custom_css=None):
       
   183     req = Request(url, headers=(headers or {}))
       
   184     content = urlopen(req).read()
       
   185     page_source = unicode(content, guess_encoding(content), 'replace')
       
   186     page_source = page_source
       
   187     match = BODY_RGX.search(page_source)
       
   188     if match is None:
       
   189         return page_source
       
   190     return prefix_links(match.group(1), prefix, url, custom_css)