server/sources/datafeed.py
changeset 9823 258d2f9f7d39
parent 9822 4a118bfd6ab4
child 9824 30183ecf5c61
equal deleted inserted replaced
9822:4a118bfd6ab4 9823:258d2f9f7d39
   296         self.sourceuris = sourceuris
   296         self.sourceuris = sourceuris
   297         self.import_log = import_log
   297         self.import_log = import_log
   298         self.stats = {'created': set(), 'updated': set(), 'checked': set()}
   298         self.stats = {'created': set(), 'updated': set(), 'checked': set()}
   299 
   299 
   300     def normalize_url(self, url):
   300     def normalize_url(self, url):
   301         from cubicweb.sobjects import URL_MAPPING # available after registration
   301         """Normalize an url by looking if there is a replacement for it in
       
   302         `cubicweb.sobjects.URL_MAPPING`.
       
   303 
       
   304         This dictionary allow to redirect from one host to another, which may be
       
   305         useful for example in case of test instance using production data, while
       
   306         you don't want to load the external source nor to hack your `/etc/hosts`
       
   307         file.
       
   308         """
       
   309         # local import mandatory, it's available after registration
       
   310         from cubicweb.sobjects import URL_MAPPING
   302         for mappedurl in URL_MAPPING:
   311         for mappedurl in URL_MAPPING:
   303             if url.startswith(mappedurl):
   312             if url.startswith(mappedurl):
   304                 return url.replace(mappedurl, URL_MAPPING[mappedurl], 1)
   313                 return url.replace(mappedurl, URL_MAPPING[mappedurl], 1)
   305         return url
   314         return url
       
   315 
       
   316     def retrieve_url(self, url):
       
   317         """Return stream linked by the given url:
       
   318         * HTTP urls will be normalized (see :meth:`normalize_url`)
       
   319         * handle file:// URL
       
   320         * other will be considered as plain content, useful for testing purpose
       
   321         """
       
   322         if url.startswith('http'):
       
   323             url = self.normalize_url(url)
       
   324             self.source.info('GET %s', url)
       
   325             return _OPENER.open(url, timeout=self.source.http_timeout)
       
   326         if url.startswith('file://'):
       
   327             return open(url[7:])
       
   328         return StringIO.StringIO(url)
   306 
   329 
   307     def add_schema_config(self, schemacfg, checkonly=False):
   330     def add_schema_config(self, schemacfg, checkonly=False):
   308         """added CWSourceSchemaConfig, modify mapping accordingly"""
   331         """added CWSourceSchemaConfig, modify mapping accordingly"""
   309         msg = schemacfg._cw._("this parser doesn't use a mapping")
   332         msg = schemacfg._cw._("this parser doesn't use a mapping")
   310         raise ValidationError(schemacfg.eid, {None: msg})
   333         raise ValidationError(schemacfg.eid, {None: msg})
   444                 rollback()
   467                 rollback()
   445                 error = True
   468                 error = True
   446         return error
   469         return error
   447 
   470 
   448     def parse(self, url):
   471     def parse(self, url):
   449         if url.startswith('http'):
   472         stream = self.retrieve_url(url)
   450             url = self.normalize_url(url)
       
   451             self.source.info('GET %s', url)
       
   452             stream = _OPENER.open(url, timeout=self.source.http_timeout)
       
   453         elif url.startswith('file://'):
       
   454             stream = open(url[7:])
       
   455         else:
       
   456             stream = StringIO.StringIO(url)
       
   457         return self.parse_etree(etree.parse(stream).getroot())
   473         return self.parse_etree(etree.parse(stream).getroot())
   458 
   474 
   459     def parse_etree(self, document):
   475     def parse_etree(self, document):
   460         return [(document,)]
   476         return [(document,)]
   461 
   477