# HG changeset patch # User Julien Cristau # Date 1450101263 -3600 # Node ID ca8321b32392ad7db4efc32ef71121f0646d6c3b # Parent 50ed87bc4cc67666396855e7c2489d59f983b85b [dataimport] separate entities table from other metadata in SQLGenObjectStore 'entities' needs to be updated first as every other table has references to its rows. Remove use of thread-local storage in the process. related to #9177565. diff -r 50ed87bc4cc6 -r ca8321b32392 dataimport/pgstore.py --- a/dataimport/pgstore.py Mon Dec 14 14:53:02 2015 +0100 +++ b/dataimport/pgstore.py Mon Dec 14 14:54:23 2015 +0100 @@ -17,7 +17,6 @@ # with CubicWeb. If not, see . """Postgres specific store""" -import threading import warnings import cPickle import os.path as osp @@ -275,7 +274,6 @@ def __init__(self, system_source, schema, dump_output_dir=None): self.system_source = system_source - self._sql = threading.local() # Explicitely backport attributes from system source self._storage_handler = self.system_source._storage_handler self.preprocess_entity = self.system_source.preprocess_entity @@ -290,8 +288,7 @@ spcfrom = system_source.dbhelper.dbapi_module.support_copy_from self.support_copy_from = spcfrom self.dbencoding = system_source.dbhelper.dbencoding - # initialize thread-local data for main thread - self.init_thread_locals() + self.init_statement_lists() self._inlined_rtypes_cache = {} self._fill_inlined_rtypes_cache(schema) self.schema = schema @@ -304,20 +301,20 @@ if rschema.inlined: cache[eschema.type] = SQL_PREFIX + rschema.type - def init_thread_locals(self): - """initializes thread-local data""" - self._sql.entities = defaultdict(list) - self._sql.relations = {} - self._sql.inlined_relations = {} + def init_statement_lists(self): + self._sql_entities = defaultdict(list) + self._sql_relations = {} + self._sql_inlined_relations = {} + self._sql_eids = defaultdict(list) # keep track, for each eid of the corresponding data dict - self._sql.eid_insertdicts = {} + self._sql_eid_insertdicts = {} def flush(self): print 'starting flush' - _entities_sql = self._sql.entities - _relations_sql = self._sql.relations - _inlined_relations_sql = self._sql.inlined_relations - _insertdicts = self._sql.eid_insertdicts + _entities_sql = self._sql_entities + _relations_sql = self._sql_relations + _inlined_relations_sql = self._sql_inlined_relations + _insertdicts = self._sql_eid_insertdicts try: # try, for each inlined_relation, to find if we're also creating # the host entity (i.e. the subject of the relation). @@ -345,7 +342,8 @@ new_datalist.append(data) _inlined_relations_sql[statement] = new_datalist _execmany_thread(self.system_source.get_connection, - _entities_sql.items() + self._sql_eids.items() + + _entities_sql.items() + _relations_sql.items() + _inlined_relations_sql.items(), dump_output_dir=self.dump_output_dir, @@ -360,7 +358,7 @@ def add_relation(self, cnx, subject, rtype, object, inlined=False, **kwargs): if inlined: - _sql = self._sql.inlined_relations + _sql = self._sql_inlined_relations data = {'cw_eid': subject, SQL_PREFIX + rtype: object} subjtype = kwargs.get('subjtype') if subjtype is None: @@ -378,7 +376,7 @@ statement = self.sqlgen.update(SQL_PREFIX + subjtype, data, ['cw_eid']) else: - _sql = self._sql.relations + _sql = self._sql_relations data = {'eid_from': subject, 'eid_to': object} statement = self.sqlgen.insert('%s_relation' % rtype, data) if statement in _sql: @@ -396,17 +394,17 @@ if rtype not in attrs: attrs[rtype] = None sql = self.sqlgen.insert(SQL_PREFIX + entity.cw_etype, attrs) - self._sql.eid_insertdicts[entity.eid] = attrs + self._sql_eid_insertdicts[entity.eid] = attrs self._append_to_entities(sql, attrs) def _append_to_entities(self, sql, attrs): - self._sql.entities[sql].append(attrs) + self._sql_entities[sql].append(attrs) def _handle_insert_entity_sql(self, cnx, sql, attrs): # We have to overwrite the source given in parameters # as here, we directly use the system source attrs['asource'] = self.system_source.uri - self._append_to_entities(sql, attrs) + self._sql_eids[sql].append(attrs) def _handle_is_relation_sql(self, cnx, sql, attrs): self._append_to_entities(sql, attrs)