web/views/embedding.py
author Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
Thu, 13 Nov 2008 11:45:57 +0100
changeset 60 dc90556488d8
parent 0 b97547f5f1fa
child 237 3df2e0ae2eba
permissions -rw-r--r--
oops, bad changeset review, revert debug changes
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     1
"""Objects interacting together to provides the external page embeding
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     2
functionality.
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     3
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     4
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     5
:organization: Logilab
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     6
:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     7
:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     8
"""
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
     9
__docformat__ = "restructuredtext en"
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    10
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    11
import re
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    12
from urlparse import urljoin
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    13
from urllib2 import urlopen, Request, HTTPError
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    14
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    15
from logilab.mtconverter import guess_encoding
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    16
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    17
from cubicweb import urlquote # XXX should use view.url_quote method
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    18
from cubicweb.interfaces import IEmbedable
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    19
from cubicweb.common.uilib import soup2xhtml
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    20
from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector,
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    21
                                    searchstate_selector, interface_selector)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    22
from cubicweb.common.view import NOINDEX, NOFOLLOW
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    23
from cubicweb.web.controller import Controller
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    24
from cubicweb.web.action import Action
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    25
from cubicweb.web.views import basetemplates
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    26
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    27
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    28
class ExternalTemplate(basetemplates.TheMainTemplate):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    29
    """template embeding an external web pages into CubicWeb web interface
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    30
    """
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    31
    id = 'external'
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    32
    
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    33
    def call(self, body):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    34
        # XXX fallback to HTML 4 mode when embeding ?
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    35
        self.set_request_content_type()
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    36
        self.process_rql(self.req.form.get('rql'))
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    37
        self.req.search_state = ('normal',)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    38
        self.template_header(self.content_type, None, self.req._('external page'),
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    39
                             [NOINDEX, NOFOLLOW])
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    40
        self.content_header()
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    41
        self.w(body)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    42
        self.content_footer()
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    43
        self.template_footer()
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    44
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    45
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    46
class EmbedController(Controller):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    47
    id = 'embed'
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    48
    template = 'external'
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    49
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    50
    def publish(self, rset=None):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    51
        req = self.req
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    52
        if 'custom_css' in req.form:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    53
            req.add_css(req.form['custom_css'])
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    54
        embedded_url = req.form['url']
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    55
        allowed = self.config['embed-allowed']
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    56
        _ = req._
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    57
        if allowed is None or not allowed.match(embedded_url):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    58
            body = '<h2>%s</h2><h3>%s</h3>' % (
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    59
                _('error while embedding page'),
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    60
                _('embedding this url is forbidden'))
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    61
        else:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    62
            prefix = req.build_url(self.id, url='')
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    63
            authorization = req.get_header('Authorization')
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    64
            if authorization:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    65
                headers = {'Authorization' : authorization}
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    66
            else:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    67
                headers = {}
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    68
            try:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    69
                body = embed_external_page(embedded_url, prefix,
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    70
                                           headers, req.form.get('custom_css'))
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    71
                body = soup2xhtml(body, self.req.encoding)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    72
            except HTTPError, err:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    73
                body = '<h2>%s</h2><h3>%s</h3>' % (
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    74
                    _('error while embedding page'), err)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    75
        return self.vreg.main_template(req, self.template, body=body)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    76
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    77
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    78
class EmbedAction(Action):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    79
    """display an 'embed' link on entity implementing `embeded_url` method
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    80
    if the returned url match embeding configuration
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    81
    """
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    82
    id = 'embed'
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    83
    controller = 'embed'
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    84
    __selectors__ = (onelinerset_selector, searchstate_selector,
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    85
                     interface_selector, score_entity_selector)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    86
    accepts_interfaces = (IEmbedable,)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    87
    
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    88
    title = _('embed')
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    89
        
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    90
    @classmethod
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    91
    def score_entity(cls, entity):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    92
        """return a score telling how well I can display the given 
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    93
        entity instance (required by the value_selector)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    94
        """
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    95
        url = entity.embeded_url()
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    96
        if not url or not url.strip():
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    97
            return 0
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    98
        allowed = cls.config['embed-allowed']
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
    99
        if allowed is None or not allowed.match(url):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   100
            return 0
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   101
        return 1
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   102
    
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   103
    def url(self, row=0):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   104
        entity = self.rset.get_entity(row, 0)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   105
        url = urljoin(self.req.base_url(), entity.embeded_url())
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   106
        if self.req.form.has_key('rql'):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   107
            return self.build_url(url=url, rql=self.req.form['rql'])
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   108
        return self.build_url(url=url)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   109
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   110
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   111
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   112
# functions doing necessary substitutions to embed an external html page ######
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   113
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   114
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   115
BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   116
HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   117
SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   118
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   119
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   120
class replace_href:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   121
    def __init__(self, prefix, custom_css=None):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   122
        self.prefix = prefix
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   123
        self.custom_css = custom_css
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   124
        
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   125
    def __call__(self, match):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   126
        original_url = match.group(1)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   127
        url = self.prefix + urlquote(original_url, safe='')
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   128
        if self.custom_css is not None:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   129
            if '?' in url:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   130
                url = '%s&amp;custom_css=%s' % (url, self.custom_css)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   131
            else:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   132
                url = '%s?custom_css=%s' % (url, self.custom_css)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   133
        return '<a href="%s"' % url
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   134
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   135
class absolutize_links:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   136
    def __init__(self, embedded_url, tag, custom_css=None):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   137
        self.embedded_url = embedded_url
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   138
        self.tag = tag
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   139
        self.custom_css = custom_css
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   140
    
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   141
    def __call__(self, match):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   142
        original_url = match.group(1)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   143
        if '://' in original_url:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   144
            return match.group(0) # leave it unchanged
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   145
        return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url))
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   146
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   147
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   148
def prefix_links(body, prefix, embedded_url, custom_css=None):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   149
    filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)),
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   150
               (SRC_RGX, absolutize_links(embedded_url, '<img src')),
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   151
               (HREF_RGX, replace_href(prefix, custom_css)))
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   152
    for rgx, repl in filters:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   153
        body = rgx.sub(repl, body)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   154
    return body
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   155
    
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   156
def embed_external_page(url, prefix, headers=None, custom_css=None):
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   157
    req = Request(url, headers=(headers or {}))
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   158
    content = urlopen(req).read()
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   159
    page_source = unicode(content, guess_encoding(content), 'replace')
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   160
    page_source =page_source
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   161
    match = BODY_RGX.search(page_source)
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   162
    if match is None:
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   163
        return page_source
b97547f5f1fa Showtime !
Adrien Di Mascio <Adrien.DiMascio@logilab.fr>
parents:
diff changeset
   164
    return prefix_links(match.group(1), prefix, url, custom_css)