server/sources/datafeed.py
changeset 9205 ea32e964fbf8
parent 9182 75493f6ca586
child 9224 5c6d6a9f4fc8
equal deleted inserted replaced
9149:31ed9dd946d1 9205:ea32e964fbf8
    76          {'type': 'time',
    76          {'type': 'time',
    77           'default': '10d',
    77           'default': '10d',
    78           'help': ('Time before logs from datafeed imports are deleted.'),
    78           'help': ('Time before logs from datafeed imports are deleted.'),
    79           'group': 'datafeed-source', 'level': 2,
    79           'group': 'datafeed-source', 'level': 2,
    80           }),
    80           }),
       
    81         ('http-timeout',
       
    82          {'type': 'time',
       
    83           'default': '1min',
       
    84           'help': ('Timeout of HTTP GET requests, when synchronizing a source.'),
       
    85           'group': 'datafeed-source', 'level': 2,
       
    86           }),
    81         )
    87         )
    82 
    88 
    83     def check_config(self, source_entity):
    89     def check_config(self, source_entity):
    84         """check configuration of source entity"""
    90         """check configuration of source entity"""
    85         typed_config = super(DataFeedSource, self).check_config(source_entity)
    91         typed_config = super(DataFeedSource, self).check_config(source_entity)
    99         properly typed with defaults set
   105         properly typed with defaults set
   100         """
   106         """
   101         super(DataFeedSource, self).update_config(source_entity, typed_config)
   107         super(DataFeedSource, self).update_config(source_entity, typed_config)
   102         self.synchro_interval = timedelta(seconds=typed_config['synchronization-interval'])
   108         self.synchro_interval = timedelta(seconds=typed_config['synchronization-interval'])
   103         self.max_lock_lifetime = timedelta(seconds=typed_config['max-lock-lifetime'])
   109         self.max_lock_lifetime = timedelta(seconds=typed_config['max-lock-lifetime'])
       
   110         self.http_timeout = typed_config['http-timeout']
   104 
   111 
   105     def init(self, activated, source_entity):
   112     def init(self, activated, source_entity):
   106         super(DataFeedSource, self).init(activated, source_entity)
   113         super(DataFeedSource, self).init(activated, source_entity)
   107         self.parser_id = source_entity.parser
   114         self.parser_id = source_entity.parser
   108         self.load_mapping(source_entity._cw)
   115         self.load_mapping(source_entity._cw)
   436 
   443 
   437     def parse(self, url):
   444     def parse(self, url):
   438         if url.startswith('http'):
   445         if url.startswith('http'):
   439             url = self.normalize_url(url)
   446             url = self.normalize_url(url)
   440             self.source.info('GET %s', url)
   447             self.source.info('GET %s', url)
   441             stream = _OPENER.open(url)
   448             stream = _OPENER.open(url, timeout=self.http_timeout)
   442         elif url.startswith('file://'):
   449         elif url.startswith('file://'):
   443             stream = open(url[7:])
   450             stream = open(url[7:])
   444         else:
   451         else:
   445             stream = StringIO.StringIO(url)
   452             stream = StringIO.StringIO(url)
   446         return self.parse_etree(etree.parse(stream).getroot())
   453         return self.parse_etree(etree.parse(stream).getroot())
   452         raise NotImplementedError
   459         raise NotImplementedError
   453 
   460 
   454     def is_deleted(self, extid, etype, eid):
   461     def is_deleted(self, extid, etype, eid):
   455         if extid.startswith('http'):
   462         if extid.startswith('http'):
   456             try:
   463             try:
   457                 _OPENER.open(self.normalize_url(extid)) # XXX HTTP HEAD request
   464                 _OPENER.open(self.normalize_url(extid), # XXX HTTP HEAD request
       
   465                              timeout=self.http_timeout)
   458             except urllib2.HTTPError as ex:
   466             except urllib2.HTTPError as ex:
   459                 if ex.code == 404:
   467                 if ex.code == 404:
   460                     return True
   468                     return True
   461         elif extid.startswith('file://'):
   469         elif extid.startswith('file://'):
   462             return exists(extid[7:])
   470             return exists(extid[7:])