128 self.public_config['use-cwuri-as-url'] = self.use_cwuri_as_url |
128 self.public_config['use-cwuri-as-url'] = self.use_cwuri_as_url |
129 |
129 |
130 def init(self, activated, source_entity): |
130 def init(self, activated, source_entity): |
131 super(DataFeedSource, self).init(activated, source_entity) |
131 super(DataFeedSource, self).init(activated, source_entity) |
132 self.parser_id = source_entity.parser |
132 self.parser_id = source_entity.parser |
133 self.load_mapping(source_entity._cw) |
|
134 |
133 |
135 def _get_parser(self, cnx, **kwargs): |
134 def _get_parser(self, cnx, **kwargs): |
136 if self.parser_id is None: |
135 if self.parser_id is None: |
137 self.warning('No parser defined on source %r', self) |
136 self.warning('No parser defined on source %r', self) |
138 raise ObjectNotFound() |
137 raise ObjectNotFound() |
139 return self.repo.vreg['parsers'].select( |
138 return self.repo.vreg['parsers'].select( |
140 self.parser_id, cnx, source=self, **kwargs) |
139 self.parser_id, cnx, source=self, **kwargs) |
141 |
|
142 def load_mapping(self, cnx): |
|
143 self.mapping = {} |
|
144 self.mapping_idx = {} |
|
145 try: |
|
146 parser = self._get_parser(cnx) |
|
147 except (RegistryNotFound, ObjectNotFound): |
|
148 return # no parser yet, don't go further |
|
149 self._load_mapping(cnx, parser=parser) |
|
150 |
|
151 def add_schema_config(self, schemacfg, checkonly=False, parser=None): |
|
152 """added CWSourceSchemaConfig, modify mapping accordingly""" |
|
153 if parser is None: |
|
154 parser = self._get_parser(schemacfg._cw) |
|
155 parser.add_schema_config(schemacfg, checkonly) |
|
156 |
|
157 def del_schema_config(self, schemacfg, checkonly=False, parser=None): |
|
158 """deleted CWSourceSchemaConfig, modify mapping accordingly""" |
|
159 if parser is None: |
|
160 parser = self._get_parser(schemacfg._cw) |
|
161 parser.del_schema_config(schemacfg, checkonly) |
|
162 |
140 |
163 def fresh(self): |
141 def fresh(self): |
164 if self.latest_retrieval is None: |
142 if self.latest_retrieval is None: |
165 return False |
143 return False |
166 return datetime.now(tz=utc) < (self.latest_retrieval + self.synchro_interval) |
144 return datetime.now(tz=utc) < (self.latest_retrieval + self.synchro_interval) |
327 req = Request(url) |
305 req = Request(url) |
328 return _OPENER.open(req, timeout=self.source.http_timeout) |
306 return _OPENER.open(req, timeout=self.source.http_timeout) |
329 |
307 |
330 # url is probably plain content |
308 # url is probably plain content |
331 return URLLibResponseAdapter(BytesIO(url.encode('ascii')), url) |
309 return URLLibResponseAdapter(BytesIO(url.encode('ascii')), url) |
332 |
|
333 def add_schema_config(self, schemacfg, checkonly=False): |
|
334 """added CWSourceSchemaConfig, modify mapping accordingly""" |
|
335 msg = schemacfg._cw._("this parser doesn't use a mapping") |
|
336 raise ValidationError(schemacfg.eid, {None: msg}) |
|
337 |
|
338 def del_schema_config(self, schemacfg, checkonly=False): |
|
339 """deleted CWSourceSchemaConfig, modify mapping accordingly""" |
|
340 msg = schemacfg._cw._("this parser doesn't use a mapping") |
|
341 raise ValidationError(schemacfg.eid, {None: msg}) |
|
342 |
310 |
343 def process_urls(self, urls, raise_on_error=False): |
311 def process_urls(self, urls, raise_on_error=False): |
344 error = False |
312 error = False |
345 for url in urls: |
313 for url in urls: |
346 self.info('pulling data from %s', url) |
314 self.info('pulling data from %s', url) |