296 self.sourceuris = sourceuris |
296 self.sourceuris = sourceuris |
297 self.import_log = import_log |
297 self.import_log = import_log |
298 self.stats = {'created': set(), 'updated': set(), 'checked': set()} |
298 self.stats = {'created': set(), 'updated': set(), 'checked': set()} |
299 |
299 |
300 def normalize_url(self, url): |
300 def normalize_url(self, url): |
301 from cubicweb.sobjects import URL_MAPPING # available after registration |
301 """Normalize an url by looking if there is a replacement for it in |
|
302 `cubicweb.sobjects.URL_MAPPING`. |
|
303 |
|
304 This dictionary allow to redirect from one host to another, which may be |
|
305 useful for example in case of test instance using production data, while |
|
306 you don't want to load the external source nor to hack your `/etc/hosts` |
|
307 file. |
|
308 """ |
|
309 # local import mandatory, it's available after registration |
|
310 from cubicweb.sobjects import URL_MAPPING |
302 for mappedurl in URL_MAPPING: |
311 for mappedurl in URL_MAPPING: |
303 if url.startswith(mappedurl): |
312 if url.startswith(mappedurl): |
304 return url.replace(mappedurl, URL_MAPPING[mappedurl], 1) |
313 return url.replace(mappedurl, URL_MAPPING[mappedurl], 1) |
305 return url |
314 return url |
|
315 |
|
316 def retrieve_url(self, url): |
|
317 """Return stream linked by the given url: |
|
318 * HTTP urls will be normalized (see :meth:`normalize_url`) |
|
319 * handle file:// URL |
|
320 * other will be considered as plain content, useful for testing purpose |
|
321 """ |
|
322 if url.startswith('http'): |
|
323 url = self.normalize_url(url) |
|
324 self.source.info('GET %s', url) |
|
325 return _OPENER.open(url, timeout=self.source.http_timeout) |
|
326 if url.startswith('file://'): |
|
327 return open(url[7:]) |
|
328 return StringIO.StringIO(url) |
306 |
329 |
307 def add_schema_config(self, schemacfg, checkonly=False): |
330 def add_schema_config(self, schemacfg, checkonly=False): |
308 """added CWSourceSchemaConfig, modify mapping accordingly""" |
331 """added CWSourceSchemaConfig, modify mapping accordingly""" |
309 msg = schemacfg._cw._("this parser doesn't use a mapping") |
332 msg = schemacfg._cw._("this parser doesn't use a mapping") |
310 raise ValidationError(schemacfg.eid, {None: msg}) |
333 raise ValidationError(schemacfg.eid, {None: msg}) |
444 rollback() |
467 rollback() |
445 error = True |
468 error = True |
446 return error |
469 return error |
447 |
470 |
448 def parse(self, url): |
471 def parse(self, url): |
449 if url.startswith('http'): |
472 stream = self.retrieve_url(url) |
450 url = self.normalize_url(url) |
|
451 self.source.info('GET %s', url) |
|
452 stream = _OPENER.open(url, timeout=self.source.http_timeout) |
|
453 elif url.startswith('file://'): |
|
454 stream = open(url[7:]) |
|
455 else: |
|
456 stream = StringIO.StringIO(url) |
|
457 return self.parse_etree(etree.parse(stream).getroot()) |
473 return self.parse_etree(etree.parse(stream).getroot()) |
458 |
474 |
459 def parse_etree(self, document): |
475 def parse_etree(self, document): |
460 return [(document,)] |
476 return [(document,)] |
461 |
477 |