190 for url in urls: |
190 for url in urls: |
191 self.info('pulling data from %s', url) |
191 self.info('pulling data from %s', url) |
192 try: |
192 try: |
193 if parser.process(url, raise_on_error): |
193 if parser.process(url, raise_on_error): |
194 error = True |
194 error = True |
195 except Exception, exc: |
195 except IOError, exc: |
196 if raise_on_error: |
196 if raise_on_error: |
197 raise |
197 raise |
198 self.error('could not pull data while processing %s: %s', |
198 self.error('could not pull data while processing %s: %s', |
199 url, exc) |
199 url, exc) |
|
200 error = True |
|
201 except Exception, exc: |
|
202 if raise_on_error: |
|
203 raise |
|
204 self.exception('error while processing %s: %s', |
|
205 url, exc) |
200 error = True |
206 error = True |
201 return error |
207 return error |
202 |
208 |
203 def before_entity_insertion(self, session, lid, etype, eid, sourceparams): |
209 def before_entity_insertion(self, session, lid, etype, eid, sourceparams): |
204 """called by the repository when an eid has been attributed for an |
210 """called by the repository when an eid has been attributed for an |
264 # name, call extid2eid on that source so entity will be properly seen as |
270 # name, call extid2eid on that source so entity will be properly seen as |
265 # coming from this source |
271 # coming from this source |
266 source = self._cw.repo.sources_by_uri.get( |
272 source = self._cw.repo.sources_by_uri.get( |
267 sourceparams.pop('cwsource', None), self.source) |
273 sourceparams.pop('cwsource', None), self.source) |
268 sourceparams['parser'] = self |
274 sourceparams['parser'] = self |
269 eid = source.extid2eid(str(uri), etype, self._cw, |
275 try: |
270 sourceparams=sourceparams) |
276 eid = source.extid2eid(str(uri), etype, self._cw, |
|
277 sourceparams=sourceparams) |
|
278 except ValidationError, ex: |
|
279 self.source.error('error while creating %s: %s', etype, ex) |
|
280 return None |
271 if eid < 0: |
281 if eid < 0: |
272 # entity has been moved away from its original source |
282 # entity has been moved away from its original source |
273 # |
283 # |
274 # Don't give etype to entity_from_eid so we get UnknownEid if the |
284 # Don't give etype to entity_from_eid so we get UnknownEid if the |
275 # entity has been removed |
285 # entity has been removed |