"""Objects interacting together to provides the external page embedingfunctionality.:organization: Logilab:copyright: 2001-2008 LOGILAB S.A. (Paris, FRANCE), all rights reserved.:contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr"""__docformat__="restructuredtext en"importrefromurlparseimporturljoinfromurllib2importurlopen,Request,HTTPErrorfromlogilab.mtconverterimportguess_encodingfromcubicwebimporturlquote# XXX should use view.url_quote methodfromcubicweb.interfacesimportIEmbedablefromcubicweb.common.uilibimportsoup2xhtmlfromcubicweb.common.selectorsimport(one_line_rset,score_entity_selector,match_search_state,implement_interface)fromcubicweb.common.viewimportNOINDEX,NOFOLLOWfromcubicweb.web.controllerimportControllerfromcubicweb.web.actionimportActionfromcubicweb.web.viewsimportbasetemplatesclassExternalTemplate(basetemplates.TheMainTemplate):"""template embeding an external web pages into CubicWeb web interface """id='external'defcall(self,body):# XXX fallback to HTML 4 mode when embeding ?self.set_request_content_type()self.process_rql(self.req.form.get('rql'))self.req.search_state=('normal',)self.template_header(self.content_type,None,self.req._('external page'),[NOINDEX,NOFOLLOW])self.content_header()self.w(body)self.content_footer()self.template_footer()classEmbedController(Controller):id='embed'template='external'defpublish(self,rset=None):req=self.reqif'custom_css'inreq.form:req.add_css(req.form['custom_css'])embedded_url=req.form['url']allowed=self.config['embed-allowed']_=req._ifallowedisNoneornotallowed.match(embedded_url):body='<h2>%s</h2><h3>%s</h3>'%(_('error while embedding page'),_('embedding this url is forbidden'))else:prefix=req.build_url(self.id,url='')authorization=req.get_header('Authorization')ifauthorization:headers={'Authorization':authorization}else:headers={}try:body=embed_external_page(embedded_url,prefix,headers,req.form.get('custom_css'))body=soup2xhtml(body,self.req.encoding)exceptHTTPError,err:body='<h2>%s</h2><h3>%s</h3>'%(_('error while embedding page'),err)returnself.vreg.main_template(req,self.template,body=body)classEmbedAction(Action):"""display an 'embed' link on entity implementing `embeded_url` method if the returned url match embeding configuration """id='embed'controller='embed'__selectors__=(one_line_rset,match_search_state,implement_interface,score_entity_selector)accepts_interfaces=(IEmbedable,)title=_('embed')@classmethoddefscore_entity(cls,entity):"""return a score telling how well I can display the given entity instance (required by the value_selector) """url=entity.embeded_url()ifnoturlornoturl.strip():return0allowed=cls.config['embed-allowed']ifallowedisNoneornotallowed.match(url):return0return1defurl(self,row=0):entity=self.rset.get_entity(row,0)url=urljoin(self.req.base_url(),entity.embeded_url())ifself.req.form.has_key('rql'):returnself.build_url(url=url,rql=self.req.form['rql'])returnself.build_url(url=url)# functions doing necessary substitutions to embed an external html page ######BODY_RGX=re.compile('<body.*?>(.*?)</body>',re.I|re.S|re.U)HREF_RGX=re.compile('<a\s+href="([^"]*)"',re.I|re.S|re.U)SRC_RGX=re.compile('<img\s+src="([^"]*)"',re.I|re.S|re.U)classreplace_href:def__init__(self,prefix,custom_css=None):self.prefix=prefixself.custom_css=custom_cssdef__call__(self,match):original_url=match.group(1)url=self.prefix+urlquote(original_url,safe='')ifself.custom_cssisnotNone:if'?'inurl:url='%s&custom_css=%s'%(url,self.custom_css)else:url='%s?custom_css=%s'%(url,self.custom_css)return'<a href="%s"'%urlclassabsolutize_links:def__init__(self,embedded_url,tag,custom_css=None):self.embedded_url=embedded_urlself.tag=tagself.custom_css=custom_cssdef__call__(self,match):original_url=match.group(1)if'://'inoriginal_url:returnmatch.group(0)# leave it unchangedreturn'%s="%s"'%(self.tag,urljoin(self.embedded_url,original_url))defprefix_links(body,prefix,embedded_url,custom_css=None):filters=((HREF_RGX,absolutize_links(embedded_url,'<a href',custom_css)),(SRC_RGX,absolutize_links(embedded_url,'<img src')),(HREF_RGX,replace_href(prefix,custom_css)))forrgx,replinfilters:body=rgx.sub(repl,body)returnbodydefembed_external_page(url,prefix,headers=None,custom_css=None):req=Request(url,headers=(headersor{}))content=urlopen(req).read()page_source=unicode(content,guess_encoding(content),'replace')page_source=page_sourcematch=BODY_RGX.search(page_source)ifmatchisNone:returnpage_sourcereturnprefix_links(match.group(1),prefix,url,custom_css)