[datafeed] gives information about moved entities to the parser
Also, don't support anymore arbitrary parameters at parser initialization.
Related to #5414754
--- a/cubicweb/server/sources/datafeed.py Wed Jul 01 08:45:29 2015 +0200
+++ b/cubicweb/server/sources/datafeed.py Wed Jul 01 08:53:29 2015 +0200
@@ -235,7 +235,8 @@
source_uris = self.source_uris(cnx)
try:
parser = self._get_parser(cnx, import_log=importlog,
- source_uris=source_uris)
+ source_uris=source_uris,
+ moved_uris=self.moved_uris(cnx))
except ObjectNotFound:
return {}
if parser.process_urls(self.urls, raise_on_error):
@@ -293,6 +294,10 @@
return dict((self.decode_extid(uri), (eid, type))
for uri, eid, type in cnx.system_sql(sql, {'source': self.uri}).fetchall())
+ def moved_uris(self, cnx):
+ sql = 'SELECT extid FROM moved_entities'
+ return set(self.decode_extid(uri) for uri, in cnx.system_sql(sql).fetchall())
+
def init_import_log(self, cnx, import_log_eid=None, **kwargs):
if import_log_eid is None:
import_log = cnx.create_entity('CWDataImport', cw_import_of=self,
@@ -309,13 +314,16 @@
class DataFeedParser(AppObject):
__registry__ = 'parsers'
- def __init__(self, cnx, source, import_log=None, source_uris=None, **kwargs):
+ def __init__(self, cnx, source, import_log=None, source_uris=None, moved_uris=None, **kwargs):
super(DataFeedParser, self).__init__(cnx, **kwargs)
self.source = source
self.import_log = import_log
if source_uris is None:
source_uris = {}
self.source_uris = source_uris
+ if moved_uris is None:
+ moved_uris = ()
+ self.moved_uris = moved_uris
self.stats = {'created': set(), 'updated': set(), 'checked': set()}
def normalize_url(self, url):