equal
deleted
inserted
replaced
76 {'type': 'time', |
76 {'type': 'time', |
77 'default': '10d', |
77 'default': '10d', |
78 'help': ('Time before logs from datafeed imports are deleted.'), |
78 'help': ('Time before logs from datafeed imports are deleted.'), |
79 'group': 'datafeed-source', 'level': 2, |
79 'group': 'datafeed-source', 'level': 2, |
80 }), |
80 }), |
|
81 ('http-timeout', |
|
82 {'type': 'time', |
|
83 'default': '1min', |
|
84 'help': ('Timeout of HTTP GET requests, when synchronizing a source.'), |
|
85 'group': 'datafeed-source', 'level': 2, |
|
86 }), |
81 ) |
87 ) |
82 |
88 |
83 def check_config(self, source_entity): |
89 def check_config(self, source_entity): |
84 """check configuration of source entity""" |
90 """check configuration of source entity""" |
85 typed_config = super(DataFeedSource, self).check_config(source_entity) |
91 typed_config = super(DataFeedSource, self).check_config(source_entity) |
99 properly typed with defaults set |
105 properly typed with defaults set |
100 """ |
106 """ |
101 super(DataFeedSource, self).update_config(source_entity, typed_config) |
107 super(DataFeedSource, self).update_config(source_entity, typed_config) |
102 self.synchro_interval = timedelta(seconds=typed_config['synchronization-interval']) |
108 self.synchro_interval = timedelta(seconds=typed_config['synchronization-interval']) |
103 self.max_lock_lifetime = timedelta(seconds=typed_config['max-lock-lifetime']) |
109 self.max_lock_lifetime = timedelta(seconds=typed_config['max-lock-lifetime']) |
|
110 self.http_timeout = typed_config['http-timeout'] |
104 |
111 |
105 def init(self, activated, source_entity): |
112 def init(self, activated, source_entity): |
106 super(DataFeedSource, self).init(activated, source_entity) |
113 super(DataFeedSource, self).init(activated, source_entity) |
107 self.parser_id = source_entity.parser |
114 self.parser_id = source_entity.parser |
108 self.load_mapping(source_entity._cw) |
115 self.load_mapping(source_entity._cw) |
436 |
443 |
437 def parse(self, url): |
444 def parse(self, url): |
438 if url.startswith('http'): |
445 if url.startswith('http'): |
439 url = self.normalize_url(url) |
446 url = self.normalize_url(url) |
440 self.source.info('GET %s', url) |
447 self.source.info('GET %s', url) |
441 stream = _OPENER.open(url) |
448 stream = _OPENER.open(url, timeout=self.http_timeout) |
442 elif url.startswith('file://'): |
449 elif url.startswith('file://'): |
443 stream = open(url[7:]) |
450 stream = open(url[7:]) |
444 else: |
451 else: |
445 stream = StringIO.StringIO(url) |
452 stream = StringIO.StringIO(url) |
446 return self.parse_etree(etree.parse(stream).getroot()) |
453 return self.parse_etree(etree.parse(stream).getroot()) |
452 raise NotImplementedError |
459 raise NotImplementedError |
453 |
460 |
454 def is_deleted(self, extid, etype, eid): |
461 def is_deleted(self, extid, etype, eid): |
455 if extid.startswith('http'): |
462 if extid.startswith('http'): |
456 try: |
463 try: |
457 _OPENER.open(self.normalize_url(extid)) # XXX HTTP HEAD request |
464 _OPENER.open(self.normalize_url(extid), # XXX HTTP HEAD request |
|
465 timeout=self.http_timeout) |
458 except urllib2.HTTPError as ex: |
466 except urllib2.HTTPError as ex: |
459 if ex.code == 404: |
467 if ex.code == 404: |
460 return True |
468 return True |
461 elif extid.startswith('file://'): |
469 elif extid.startswith('file://'): |
462 return exists(extid[7:]) |
470 return exists(extid[7:]) |