author | Adrien Di Mascio <Adrien.DiMascio@logilab.fr> |
Mon, 10 Nov 2008 19:33:55 +0100 | |
changeset 16 | a70ece4d9d1a |
parent 0 | b97547f5f1fa |
child 237 | 3df2e0ae2eba |
permissions | -rw-r--r-- |
"""Objects interacting together to provides the external page embeding functionality. :organization: Logilab :copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved. :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr """ __docformat__ = "restructuredtext en" import re from urlparse import urljoin from urllib2 import urlopen, Request, HTTPError from logilab.mtconverter import guess_encoding from cubicweb import urlquote # XXX should use view.url_quote method from cubicweb.interfaces import IEmbedable from cubicweb.common.uilib import soup2xhtml from cubicweb.common.selectors import (onelinerset_selector, score_entity_selector, searchstate_selector, interface_selector) from cubicweb.common.view import NOINDEX, NOFOLLOW from cubicweb.web.controller import Controller from cubicweb.web.action import Action from cubicweb.web.views import basetemplates class ExternalTemplate(basetemplates.TheMainTemplate): """template embeding an external web pages into CubicWeb web interface """ id = 'external' def call(self, body): # XXX fallback to HTML 4 mode when embeding ? self.set_request_content_type() self.process_rql(self.req.form.get('rql')) self.req.search_state = ('normal',) self.template_header(self.content_type, None, self.req._('external page'), [NOINDEX, NOFOLLOW]) self.content_header() self.w(body) self.content_footer() self.template_footer() class EmbedController(Controller): id = 'embed' template = 'external' def publish(self, rset=None): req = self.req if 'custom_css' in req.form: req.add_css(req.form['custom_css']) embedded_url = req.form['url'] allowed = self.config['embed-allowed'] _ = req._ if allowed is None or not allowed.match(embedded_url): body = '<h2>%s</h2><h3>%s</h3>' % ( _('error while embedding page'), _('embedding this url is forbidden')) else: prefix = req.build_url(self.id, url='') authorization = req.get_header('Authorization') if authorization: headers = {'Authorization' : authorization} else: headers = {} try: body = embed_external_page(embedded_url, prefix, headers, req.form.get('custom_css')) body = soup2xhtml(body, self.req.encoding) except HTTPError, err: body = '<h2>%s</h2><h3>%s</h3>' % ( _('error while embedding page'), err) return self.vreg.main_template(req, self.template, body=body) class EmbedAction(Action): """display an 'embed' link on entity implementing `embeded_url` method if the returned url match embeding configuration """ id = 'embed' controller = 'embed' __selectors__ = (onelinerset_selector, searchstate_selector, interface_selector, score_entity_selector) accepts_interfaces = (IEmbedable,) title = _('embed') @classmethod def score_entity(cls, entity): """return a score telling how well I can display the given entity instance (required by the value_selector) """ url = entity.embeded_url() if not url or not url.strip(): return 0 allowed = cls.config['embed-allowed'] if allowed is None or not allowed.match(url): return 0 return 1 def url(self, row=0): entity = self.rset.get_entity(row, 0) url = urljoin(self.req.base_url(), entity.embeded_url()) if self.req.form.has_key('rql'): return self.build_url(url=url, rql=self.req.form['rql']) return self.build_url(url=url) # functions doing necessary substitutions to embed an external html page ###### BODY_RGX = re.compile('<body.*?>(.*?)</body>', re.I | re.S | re.U) HREF_RGX = re.compile('<a\s+href="([^"]*)"', re.I | re.S | re.U) SRC_RGX = re.compile('<img\s+src="([^"]*)"', re.I | re.S | re.U) class replace_href: def __init__(self, prefix, custom_css=None): self.prefix = prefix self.custom_css = custom_css def __call__(self, match): original_url = match.group(1) url = self.prefix + urlquote(original_url, safe='') if self.custom_css is not None: if '?' in url: url = '%s&custom_css=%s' % (url, self.custom_css) else: url = '%s?custom_css=%s' % (url, self.custom_css) return '<a href="%s"' % url class absolutize_links: def __init__(self, embedded_url, tag, custom_css=None): self.embedded_url = embedded_url self.tag = tag self.custom_css = custom_css def __call__(self, match): original_url = match.group(1) if '://' in original_url: return match.group(0) # leave it unchanged return '%s="%s"' % (self.tag, urljoin(self.embedded_url, original_url)) def prefix_links(body, prefix, embedded_url, custom_css=None): filters = ((HREF_RGX, absolutize_links(embedded_url, '<a href', custom_css)), (SRC_RGX, absolutize_links(embedded_url, '<img src')), (HREF_RGX, replace_href(prefix, custom_css))) for rgx, repl in filters: body = rgx.sub(repl, body) return body def embed_external_page(url, prefix, headers=None, custom_css=None): req = Request(url, headers=(headers or {})) content = urlopen(req).read() page_source = unicode(content, guess_encoding(content), 'replace') page_source =page_source match = BODY_RGX.search(page_source) if match is None: return page_source return prefix_links(match.group(1), prefix, url, custom_css)