[multi-sources-removal] Drop deleted_entities system table and entities.mtime column
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 21 Jan 2014 18:20:28 +0100
changeset 9450 af4b93bc38a5
parent 9449 287a05ec7ab1
child 9451 c83a8ecb9bf5
[multi-sources-removal] Drop deleted_entities system table and entities.mtime column since they were only used by the entities_modified_since api of the repository which has been dropped. Along with them, the multi-sources-etypes configuration variable and some sql queries at modification/deletion time of entities. Bon vent ! Related to #2919300
--- a/dataimport.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/dataimport.py	Tue Jan 21 18:20:28 2014 +0100
@@ -1125,7 +1125,7 @@
             assert isinstance(extid, str)
             extid = b64encode(extid)
         attrs = {'type': entity.cw_etype, 'eid': entity.eid, 'extid': extid,
-                 'source': 'system', 'asource': source.uri, 'mtime': datetime.utcnow()}
+                 'source': 'system', 'asource': source.uri}
         self._handle_insert_entity_sql(session, self.sqlgen.insert('entities', attrs), attrs)
         # insert core relations: is, is_instance_of and cw_source
--- a/hooks/__init__.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/hooks/__init__.py	Tue Jan 21 18:20:28 2014 +0100
@@ -39,10 +39,6 @@
                     'DELETE FROM transactions WHERE tx_time < %(time)s',
                     {'time': mindate})
-                # cleanup deleted entities
-                session.system_sql(
-                    'DELETE FROM deleted_entities WHERE dtime < %(time)s',
-                    {'time': mindate})
--- a/hooks/metadata.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/hooks/metadata.py	Tue Jan 21 18:20:28 2014 +0100
@@ -204,8 +204,7 @@
             self._cw.system_sql('UPDATE entities SET eid=-eid WHERE eid=%(eid)s',
                                 {'eid': self.eidfrom})
             attrs = {'type': entity.cw_etype, 'eid': entity.eid, 'extid': None,
-                     'source': 'system', 'asource': 'system',
-                     'mtime': datetime.now()}
+                     'source': 'system', 'asource': 'system'}
             self._cw.system_sql(syssource.sqlgen.insert('entities', attrs), attrs)
             # register an operation to update repository/sources caches
             ChangeEntitySourceUpdateCaches(self._cw, entity=entity,
--- a/hooks/syncschema.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/hooks/syncschema.py	Tue Jan 21 18:20:28 2014 +0100
@@ -297,8 +297,6 @@
         for eid, (etype, uri, extid, auri) in self.session.repo._type_source_cache.items():
             if etype == oldname:
                 self.session.repo._type_source_cache[eid] = (newname, uri, extid, auri)
-        sqlexec('UPDATE deleted_entities SET type=%(newname)s WHERE type=%(oldname)s',
-                {'newname': newname, 'oldname': oldname})
         # XXX transaction records
     def precommit_event(self):
--- a/misc/migration/3.11.0_Any.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/misc/migration/3.11.0_Any.py	Tue Jan 21 18:20:28 2014 +0100
@@ -9,77 +9,3 @@
     add_attribute('CWSource', 'url')
     add_attribute('CWSource', 'parser')
     add_attribute('CWSource', 'latest_retrieval')
-    from cubicweb.server.sources.pyrorql import PyroRQLSource
-except ImportError:
-    pass
-    from os.path import join
-    # function to read old python mapping file
-    def load_mapping_file(source):
-        mappingfile = source.config['mapping-file']
-        mappingfile = join(source.repo.config.apphome, mappingfile)
-        mapping = {}
-        execfile(mappingfile, mapping)
-        for junk in ('__builtins__', '__doc__'):
-            mapping.pop(junk, None)
-        mapping.setdefault('support_relations', {})
-        mapping.setdefault('dont_cross_relations', set())
-        mapping.setdefault('cross_relations', set())
-        # do some basic checks of the mapping content
-        assert 'support_entities' in mapping, \
-               'mapping file should at least define support_entities'
-        assert isinstance(mapping['support_entities'], dict)
-        assert isinstance(mapping['support_relations'], dict)
-        assert isinstance(mapping['dont_cross_relations'], set)
-        assert isinstance(mapping['cross_relations'], set)
-        unknown = set(mapping) - set( ('support_entities', 'support_relations',
-                                       'dont_cross_relations', 'cross_relations') )
-        assert not unknown, 'unknown mapping attribute(s): %s' % unknown
-        # relations that are necessarily not crossed
-        for rtype in ('is', 'is_instance_of', 'cw_source'):
-            assert rtype not in mapping['dont_cross_relations'], \
-                   '%s relation should not be in dont_cross_relations' % rtype
-            assert rtype not in mapping['support_relations'], \
-                   '%s relation should not be in support_relations' % rtype
-        return mapping
-    # for now, only pyrorql sources have a mapping
-    for source in repo.sources_by_uri.itervalues():
-        if not isinstance(source, PyroRQLSource):
-            continue
-        sourceentity = session.entity_from_eid(source.eid)
-        mapping = load_mapping_file(source)
-        # write mapping as entities
-        print 'migrating map for', source
-        for etype, write in mapping['support_entities'].items():
-            create_entity('CWSourceSchemaConfig',
-                          cw_for_source=sourceentity,
-                          cw_schema=session.entity_from_eid(schema[etype].eid),
-                          options=write and u'write' or None,
-                          ask_confirm=False)
-        for rtype, write in mapping['support_relations'].items():
-            options = []
-            if write:
-                options.append(u'write')
-            if rtype in mapping['cross_relations']:
-                options.append(u'maycross')
-            create_entity('CWSourceSchemaConfig',
-                          cw_for_source=sourceentity,
-                          cw_schema=session.entity_from_eid(schema[rtype].eid),
-                          options=u':'.join(options) or None,
-                          ask_confirm=False)
-        for rtype in mapping['dont_cross_relations']:
-            create_entity('CWSourceSchemaConfig',
-                          cw_for_source=source,
-                          cw_schema=session.entity_from_eid(schema[rtype].eid),
-                          options=u'dontcross',
-                          ask_confirm=False)
-        # latest update time cwproperty is now a source attribute (latest_retrieval)
-        pkey = u'sources.%s.latest-update-time' % source.uri
-        rset = session.execute('Any V WHERE X is CWProperty, X value V, X pkey %(k)s',
-                               {'k': pkey})
-        timestamp = int(rset[0][0])
-        sourceentity.cw_set(latest_retrieval=datetime.fromtimestamp(timestamp))
-        session.execute('DELETE CWProperty X WHERE X pkey %(k)s', {'k': pkey})
--- a/misc/migration/3.14.4_Any.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/misc/migration/3.14.4_Any.py	Tue Jan 21 18:20:28 2014 +0100
@@ -8,4 +8,3 @@
 sql('UPDATE entities SET asource = source WHERE asource is NULL')
 dbhelper.change_col_type(cursor, 'entities', 'asource', attrtype, False)
 dbhelper.change_col_type(cursor, 'entities', 'source', attrtype, False)
-dbhelper.change_col_type(cursor, 'deleted_entities', 'source', attrtype, False)
--- a/misc/migration/3.16.0_Any.py	Mon Jun 17 00:07:35 2013 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-for source in rql('CWSource X WHERE X type "pyrorql"').entities():
-    sconfig = source.dictconfig
-    nsid = sconfig.pop('pyro-ns-id', config.appid)
-    nshost = sconfig.pop('pyro-ns-host', '')
-    nsgroup = sconfig.pop('pyro-ns-group', ':cubicweb')
-    if nsgroup:
-        nsgroup += '.'
-    source.cw_set(url=u'pyro://%s/%s%s' % (nshost, nsgroup, nsid))
-    source.update_config(skip_unknown=True, **sconfig)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/misc/migration/3.19.0_Any.py	Tue Jan 21 18:20:28 2014 +0100
@@ -0,0 +1,2 @@
+sql('DROP TABLE "deleted_entities"')
+sql('ALTER TABLE "entities" DROP COLUMN "mtime"')
--- a/misc/migration/bootstrapmigration_repository.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/misc/migration/bootstrapmigration_repository.py	Tue Jan 21 18:20:28 2014 +0100
@@ -223,11 +223,11 @@
 if applcubicwebversion < (3, 2, 2) and cubicwebversion >= (3, 2, 1):
     from base64 import b64encode
-    for table in ('entities', 'deleted_entities'):
-        for eid, extid in sql('SELECT eid, extid FROM %s WHERE extid is NOT NULL'
-                              % table, ask_confirm=False):
-            sql('UPDATE %s SET extid=%%(extid)s WHERE eid=%%(eid)s' % table,
-                {'extid': b64encode(extid), 'eid': eid}, ask_confirm=False)
+    for eid, extid in sql('SELECT eid, extid FROM entities '
+                          'WHERE extid is NOT NULL',
+                          ask_confirm=False):
+        sql('UPDATE entities SET extid=%(extid)s WHERE eid=%(eid)s',
+            {'extid': b64encode(extid), 'eid': eid}, ask_confirm=False)
 if applcubicwebversion < (3, 2, 0) and cubicwebversion >= (3, 2, 0):
--- a/server/repository.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/server/repository.py	Tue Jan 21 18:20:28 2014 +0100
@@ -1137,9 +1137,7 @@
         """delete system information on deletion of an entity:
         * delete all remaining relations from/to this entity
-        * call delete info on the system source which will transfer record from
-          the entities table to the deleted_entities table
+        * call delete info on the system source
         When scleanup is specified, it's expected to be the source's eid, in
         which case we'll specify the target's relation source so that this
--- a/server/schemaserial.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/server/schemaserial.py	Tue Jan 21 18:20:28 2014 +0100
@@ -133,11 +133,6 @@
             sqlexec('UPDATE entities SET type=%(n)s WHERE type=%(x)s',
                     {'x': etype, 'n': netype})
-            try:
-                sqlexec('UPDATE deleted_entities SET type=%(n)s WHERE type=%(x)s',
-                        {'x': etype, 'n': netype})
-            except Exception:
-                pass
             tocleanup = [eid]
             tocleanup += (eid for eid, cached in repo._type_source_cache.iteritems()
                           if etype == cached[0])
--- a/server/sources/native.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/server/sources/native.py	Tue Jan 21 18:20:28 2014 +0100
@@ -112,20 +112,6 @@
     return ','.join(sql), varmap
-def _modified_sql(table, etypes):
-    # XXX protect against sql injection
-    if len(etypes) > 1:
-        restr = 'type IN (%s)' % ','.join("'%s'" % etype for etype in etypes)
-    else:
-        restr = "type='%s'" % etypes[0]
-    if table == 'entities':
-        attr = 'mtime'
-    else:
-        attr = 'dtime'
-    return 'SELECT type, eid FROM %s WHERE %s AND %s > %%(time)s' % (
-        table, restr, attr)
 def sql_or_clauses(sql, clauses):
     select, restr = sql.split(' WHERE ', 1)
     restrclauses = restr.split(' AND ')
@@ -138,6 +124,7 @@
         restr = '(%s)' % ' OR '.join(clauses)
     return '%s WHERE %s' % (select, restr)
 def rdef_table_column(rdef):
     """return table and column used to store the given relation definition in
     the database
@@ -145,6 +132,7 @@
     return (SQL_PREFIX + str(rdef.subject),
             SQL_PREFIX + str(rdef.rtype))
 def rdef_physical_info(dbhelper, rdef):
     """return backend type and a boolean flag if NULL values should be allowed
     for a given relation definition
@@ -299,8 +287,6 @@
         self._eid_creation_cnx = None
         # (etype, attr) / storage mapping
         self._storages = {}
-        # entity types that may be used by other multi-sources instances
-        self.multisources_etypes = set(repo.config['multi-sources-etypes'])
         # XXX no_sqlite_wrap trick since we've a sqlite locking pb when
         # running unittest_multisources with the wrapping below
         if self.dbdriver == 'sqlite' and \
@@ -968,9 +954,8 @@
             if extid is not None:
                 assert isinstance(extid, str)
                 extid = b64encode(extid)
-            uri = 'system'
             attrs = {'type': entity.cw_etype, 'eid': entity.eid, 'extid': extid,
-                     'source': uri, 'asource': source.uri, 'mtime': datetime.utcnow()}
+                     'source': 'system', 'asource': source.uri}
             self._handle_insert_entity_sql(session, self.sqlgen.insert('entities', attrs), attrs)
             # insert core relations: is, is_instance_of and cw_source
@@ -999,10 +984,6 @@
             # reindex the entity only if this query is updating at least
             # one indexable attribute
             self.index_entity(session, entity=entity)
-        # update entities.mtime.
-        # XXX Only if entity.cw_etype in self.multisources_etypes?
-        attrs = {'eid': entity.eid, 'mtime': datetime.utcnow()}
-        self.doexec(session, self.sqlgen.update('entities', attrs, ['eid']), attrs)
     def delete_info_multi(self, session, entities, uri):
         """delete system information on deletion of a list of entities with the
@@ -1010,43 +991,10 @@
         * update the fti
         * remove record from the `entities` table
-        * transfer it to the `deleted_entities`
         self.fti_unindex_entities(session, entities)
         attrs = {'eid': '(%s)' % ','.join([str(_e.eid) for _e in entities])}
         self.doexec(session, self.sqlgen.delete_many('entities', attrs), attrs)
-        if entities[0].__regid__ not in self.multisources_etypes:
-            return
-        attrs = {'type': entities[0].__regid__,
-                 'source': uri, 'dtime': datetime.utcnow()}
-        for entity in entities:
-            extid = entity.cw_metainformation()['extid']
-            if extid is not None:
-                assert isinstance(extid, str), type(extid)
-                extid = b64encode(extid)
-            attrs.update({'eid': entity.eid, 'extid': extid})
-            self.doexec(session, self.sqlgen.insert('deleted_entities', attrs), attrs)
-    def modified_entities(self, session, etypes, mtime):
-        """return a 2-uple:
-        * list of (etype, eid) of entities of the given types which have been
-          modified since the given timestamp (actually entities whose full text
-          index content has changed)
-        * list of (etype, eid) of entities of the given types which have been
-          deleted since the given timestamp
-        """
-        for etype in etypes:
-            if not etype in self.multisources_etypes:
-                self.error('%s not listed as a multi-sources entity types. '
-                              'Modify your configuration' % etype)
-                self.multisources_etypes.add(etype)
-        modsql = _modified_sql('entities', etypes)
-        cursor = self.doexec(session, modsql, {'time': mtime})
-        modentities = cursor.fetchall()
-        delsql = _modified_sql('deleted_entities', etypes)
-        cursor = self.doexec(session, delsql, {'time': mtime})
-        delentities = cursor.fetchall()
-        return modentities, delentities
     # undo support #############################################################
@@ -1294,10 +1242,6 @@
         self.doexec(session, sql, action.changes)
         # restore record in entities (will update fti if needed)
         self.add_info(session, entity, self, None, True)
-        # remove record from deleted_entities if entity's type is multi-sources
-        if entity.cw_etype in self.multisources_etypes:
-            self.doexec(session,
-                        'DELETE FROM deleted_entities WHERE eid=%s' % eid)
         self.repo.hm.call_hooks('after_add_entity', session, entity=entity)
         return errors
@@ -1499,24 +1443,11 @@
   type VARCHAR(64) NOT NULL,
   source VARCHAR(128) NOT NULL,
   asource VARCHAR(128) NOT NULL,
-  mtime %s NOT NULL,
   extid VARCHAR(256)
 CREATE INDEX entities_type_idx ON entities(type);;
-CREATE INDEX entities_mtime_idx ON entities(mtime);;
 CREATE INDEX entities_extid_idx ON entities(extid);;
-CREATE TABLE deleted_entities (
-  type VARCHAR(64) NOT NULL,
-  source VARCHAR(128) NOT NULL,
-  dtime %s NOT NULL,
-  extid VARCHAR(256)
-CREATE INDEX deleted_entities_type_idx ON deleted_entities(type);;
-CREATE INDEX deleted_entities_dtime_idx ON deleted_entities(dtime);;
-CREATE INDEX deleted_entities_extid_idx ON deleted_entities(extid);;
 CREATE TABLE transactions (
   tx_user INTEGER NOT NULL,
@@ -1555,7 +1486,7 @@
 CREATE INDEX tx_relation_actions_eid_to_idx ON tx_relation_actions(eid_to);;
 CREATE INDEX tx_relation_actions_tx_uuid_idx ON tx_relation_actions(tx_uuid);;
 """ % (helper.sql_create_sequence('entities_id_seq').replace(';', ';;'),
-       typemap['Datetime'], typemap['Datetime'], typemap['Datetime'],
+       typemap['Datetime'],
        typemap['Boolean'], typemap['Bytes'], typemap['Boolean'])
     if helper.backend_name == 'sqlite':
         # sqlite support the ON DELETE CASCADE syntax but do nothing
@@ -1575,7 +1506,6 @@
     return """
 DROP TABLE entities;
-DROP TABLE deleted_entities;
 DROP TABLE tx_entity_actions;
 DROP TABLE tx_relation_actions;
 DROP TABLE transactions;
@@ -1584,7 +1514,7 @@
 def grant_schema(user, set_owner=True):
     result = ''
-    for table in ('entities', 'deleted_entities', 'entities_id_seq',
+    for table in ('entities', 'entities_id_seq',
                   'transactions', 'tx_entity_actions', 'tx_relation_actions'):
         if set_owner:
             result = 'ALTER TABLE %s OWNER TO %s;\n' % (table, user)
@@ -1731,7 +1661,6 @@
     def get_tables(self):
         non_entity_tables = ['entities',
-                             'deleted_entities',
--- a/server/test/unittest_repository.py	Mon Jun 17 00:07:35 2013 +0200
+++ b/server/test/unittest_repository.py	Tue Jan 21 18:20:28 2014 +0100
@@ -695,11 +695,8 @@
         self.repo.add_info(self.session, entity, self.repo.system_source)
         cu = self.session.system_sql('SELECT * FROM entities WHERE eid = -1')
         data = cu.fetchall()
-        self.assertIsInstance(data[0][4], datetime)
-        data[0] = list(data[0])
-        data[0][4] = None
         self.assertEqual(tuplify(data), [(-1, 'Personne', 'system', 'system',
-                                          None, None)])
+                                          None)])
         self.repo.delete_info(self.session, entity, 'system', None)
         cu = self.session.system_sql('SELECT * FROM entities WHERE eid = -1')