[repository] Drop deprecated extid2eid API and friends
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Fri, 30 Sep 2016 17:36:40 +0200
changeset 11756 60fed6272771
parent 11755 96ced95e4002
child 11757 e845746b4d3c
[repository] Drop deprecated extid2eid API and friends This will break cwxmlparser based sources. They should be rewritten using a specific parser, based on xml representation or on rqlio. This is harsh but allows a so big cleanup of the code base. Furthermore, it's necessary for asource/extid handling in the entities table which is costly for most app that don't care at all about that... In this cset, delete: * all extid2eid methods * repo._extid_cache handling * [before/after]_entity_insertion source callback * the cwxmlparser and update related tests, notably unittest_datafeed where 'repull' testing has been removed, since it's now handled by the dataimport API and should not be retested there. Related to #15538288 Closes #15538383
cubicweb/devtools/__init__.py
cubicweb/devtools/fake.py
cubicweb/hooks/metadata.py
cubicweb/server/repository.py
cubicweb/server/sources/__init__.py
cubicweb/server/sources/datafeed.py
cubicweb/server/sources/native.py
cubicweb/server/test/unittest_datafeed.py
cubicweb/sobjects/cwxmlparser.py
cubicweb/sobjects/services.py
cubicweb/sobjects/test/unittest_cwxmlparser.py
cubicweb/web/test/unittest_views_cwsources.py
cubicweb/web/views/debug.py
--- a/cubicweb/devtools/__init__.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/devtools/__init__.py	Fri Sep 30 17:36:40 2016 +0200
@@ -127,7 +127,6 @@
         for cnxset in repo.cnxsets:
             cnxset.reconnect()
         repo._type_source_cache = {}
-        repo._extid_cache = {}
         repo.querier._rql_cache = {}
         repo.system_source.reset_caches()
         repo._needs_refresh = False
--- a/cubicweb/devtools/fake.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/devtools/fake.py	Fri Sep 30 17:36:40 2016 +0200
@@ -169,10 +169,11 @@
     def running_hooks_ops(self):
         yield
 
+
 class FakeRepo(object):
     querier = None
+
     def __init__(self, schema, vreg=None, config=None):
-        self.extids = {}
         self.eids = {}
         self._count = 0
         self.schema = schema
@@ -183,20 +184,6 @@
     def internal_session(self):
         return FakeSession(self)
 
-    def extid2eid(self, source, extid, etype, cnx, insert=True):
-        try:
-            return self.extids[extid]
-        except KeyError:
-            if not insert:
-                return None
-            self._count += 1
-            eid = self._count
-            entity = source.before_entity_insertion(cnx, extid, etype, eid)
-            self.extids[extid] = eid
-            self.eids[eid] = extid
-            source.after_entity_insertion(cnx, extid, entity)
-            return eid
-
 
 class FakeSource(object):
     dbhelper = get_db_helper('sqlite')
--- a/cubicweb/hooks/metadata.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/hooks/metadata.py	Fri Sep 30 17:36:40 2016 +0200
@@ -166,7 +166,6 @@
         extid = entity.cw_metainformation()['extid']
         repo._type_source_cache[entity.eid] = (
             entity.cw_etype, None, self.newsource.uri)
-        repo._extid_cache[extid] = -entity.eid
 
 
 class ChangeEntitySourceDeleteHook(MetaDataHook):
--- a/cubicweb/server/repository.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/server/repository.py	Fri Sep 30 17:36:40 2016 +0200
@@ -183,7 +183,6 @@
         # cache eid -> (type, extid, actual source)
         self._type_source_cache = {}
         # cache extid -> eid
-        self._extid_cache = {}
         # open some connection sets
         if config.init_cnxset_pool:
             self.init_cnxset_pool()
@@ -732,13 +731,11 @@
 
     def clear_caches(self, eids):
         etcache = self._type_source_cache
-        extidcache = self._extid_cache
         rqlcache = self.querier._rql_cache
         for eid in eids:
             try:
                 etype, extid, auri = etcache.pop(int(eid))  # may be a string in some cases
                 rqlcache.pop(('%s X WHERE X eid %s' % (etype, eid),), None)
-                extidcache.pop(extid, None)
             except KeyError:
                 etype = None
             rqlcache.pop(('Any X WHERE X eid %s' % eid,), None)
@@ -763,73 +760,6 @@
             args[key] = int(args[key])
         return tuple(cachekey)
 
-    @deprecated('[3.22] use the new store API')
-    def extid2eid(self, source, extid, etype, cnx, insert=True,
-                  sourceparams=None):
-        """Return eid from a local id. If the eid is a negative integer, that
-        means the entity is known but has been copied back to the system source
-        hence should be ignored.
-
-        If no record is found, ie the entity is not known yet:
-
-        1. an eid is attributed
-
-        2. the source's :meth:`before_entity_insertion` method is called to
-           build the entity instance
-
-        3. unless source's :attr:`should_call_hooks` tell otherwise,
-          'before_add_entity' hooks are called
-
-        4. record is added into the system source
-
-        5. the source's :meth:`after_entity_insertion` method is called to
-           complete building of the entity instance
-
-        6. unless source's :attr:`should_call_hooks` tell otherwise,
-          'before_add_entity' hooks are called
-        """
-        try:
-            return self._extid_cache[extid]
-        except KeyError:
-            pass
-        eid = self.system_source.extid2eid(cnx, extid)
-        if eid is not None:
-            self._extid_cache[extid] = eid
-            self._type_source_cache[eid] = (etype, extid, source.uri)
-            return eid
-        if not insert:
-            return
-        # no link between extid and eid, create one
-        # write query, ensure connection's mode is 'write' so connections
-        # won't be released until commit/rollback
-        try:
-            eid = self.system_source.create_eid(cnx)
-            self._extid_cache[extid] = eid
-            self._type_source_cache[eid] = (etype, extid, source.uri)
-            entity = source.before_entity_insertion(
-                cnx, extid, etype, eid, sourceparams)
-            if source.should_call_hooks:
-                # get back a copy of operation for later restore if
-                # necessary, see below
-                pending_operations = cnx.pending_operations[:]
-                self.hm.call_hooks('before_add_entity', cnx, entity=entity)
-            self.add_info(cnx, entity, source, extid)
-            source.after_entity_insertion(cnx, extid, entity, sourceparams)
-            if source.should_call_hooks:
-                self.hm.call_hooks('after_add_entity', cnx, entity=entity)
-            return eid
-        except Exception:
-            # XXX do some cleanup manually so that the transaction has a
-            # chance to be commited, with simply this entity discarded
-            self._extid_cache.pop(extid, None)
-            self._type_source_cache.pop(eid, None)
-            if 'entity' in locals():
-                hook.CleanupDeletedEidsCacheOp.get_instance(cnx).add_data(entity.eid)
-                self.system_source.delete_info_multi(cnx, [entity])
-                if source.should_call_hooks:
-                    cnx.pending_operations = pending_operations
-            raise
-
     def add_info(self, cnx, entity, source, extid=None):
         """add type and source info for an eid into the system table,
         and index the entity with the full text index
@@ -885,7 +815,6 @@
             extid = None
         else:
             extid = source.get_extid(entity)
-            self._extid_cache[str(extid)] = entity.eid
         self._type_source_cache[entity.eid] = (entity.cw_etype, extid, source.uri)
         return extid
 
--- a/cubicweb/server/sources/__init__.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/server/sources/__init__.py	Fri Sep 30 17:36:40 2016 +0200
@@ -64,9 +64,6 @@
 class AbstractSource(object):
     """an abstract class for sources"""
 
-    # boolean telling if modification hooks should be called when something is
-    # modified in this source
-    should_call_hooks = True
     # boolean telling if the repository should connect to this source during
     # migration
     connect_for_migration = True
@@ -258,25 +255,6 @@
 
     # external source api ######################################################
 
-    def before_entity_insertion(self, cnx, lid, etype, eid, sourceparams):
-        """called by the repository when an eid has been attributed for an
-        entity stored here but the entity has not been inserted in the system
-        table yet.
-
-        This method must return the an Entity instance representation of this
-        entity.
-        """
-        entity = self.repo.vreg['etypes'].etype_class(etype)(cnx)
-        entity.eid = eid
-        entity.cw_edited = EditedEntity(entity)
-        return entity
-
-    def after_entity_insertion(self, cnx, lid, entity, sourceparams):
-        """called by the repository after an entity stored here has been
-        inserted in the system table.
-        """
-        pass
-
     def _load_mapping(self, cnx, **kwargs):
         if not 'CWSourceSchemaConfig' in self.schema:
             self.warning('instance is not mapping ready')
@@ -408,13 +386,6 @@
         raise NotImplementedError(self)
 
 
-    @deprecated('[3.13] use extid2eid(source, value, etype, cnx, **kwargs)')
-    def extid2eid(self, value, etype, cnx, **kwargs):
-        return self.repo.extid2eid(self, value, etype, cnx, **kwargs)
-
-
-
-
 def source_adapter(source_type):
     try:
         return SOURCE_TYPES[source_type]
--- a/cubicweb/server/sources/datafeed.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/server/sources/datafeed.py	Fri Sep 30 17:36:40 2016 +0200
@@ -24,7 +24,6 @@
 from datetime import datetime, timedelta
 from functools import partial
 
-from six import text_type
 from six.moves.urllib.parse import urlparse
 from six.moves.urllib.request import Request, build_opener, HTTPCookieProcessor
 from six.moves.urllib.error import HTTPError
@@ -35,8 +34,7 @@
 
 from logilab.common.deprecation import deprecated
 
-from cubicweb import RegistryNotFound, ObjectNotFound, ValidationError, UnknownEid, SourceException
-from cubicweb.server.repository import preprocess_inlined_relations
+from cubicweb import RegistryNotFound, ObjectNotFound, ValidationError, SourceException
 from cubicweb.server.sources import AbstractSource
 from cubicweb.appobject import AppObject
 
@@ -257,42 +255,6 @@
         cnx.commit()
         return stats
 
-    @deprecated('[3.21] use the new store API')
-    def before_entity_insertion(self, cnx, lid, etype, eid, sourceparams):
-        """called by the repository when an eid has been attributed for an
-        entity stored here but the entity has not been inserted in the system
-        table yet.
-
-        This method must return the an Entity instance representation of this
-        entity.
-        """
-        entity = super(DataFeedSource, self).before_entity_insertion(
-            cnx, lid, etype, eid, sourceparams)
-        entity.cw_edited['cwuri'] = lid.decode('utf-8')
-        entity.cw_edited.set_defaults()
-        sourceparams['parser'].before_entity_copy(entity, sourceparams)
-        return entity
-
-    @deprecated('[3.21] use the new store API')
-    def after_entity_insertion(self, cnx, lid, entity, sourceparams):
-        """called by the repository after an entity stored here has been
-        inserted in the system table.
-        """
-        relations = preprocess_inlined_relations(cnx, entity)
-        if cnx.is_hook_category_activated('integrity'):
-            entity.cw_edited.check(creation=True)
-        self.repo.system_source.add_entity(cnx, entity)
-        entity.cw_edited.saved = entity._cw_is_saved = True
-        sourceparams['parser'].after_entity_copy(entity, sourceparams)
-        # call hooks for inlined relations
-        call_hooks = self.repo.hm.call_hooks
-        if self.should_call_hooks:
-            for attr, value in relations:
-                call_hooks('before_add_relation', cnx,
-                           eidfrom=entity.eid, rtype=attr, eidto=value)
-                call_hooks('after_add_relation', cnx,
-                           eidfrom=entity.eid, rtype=attr, eidto=value)
-
     def source_uris(self, cnx):
         sql = 'SELECT extid, eid, type FROM entities WHERE asource=%(source)s'
         return dict((self.decode_extid(uri), (eid, type))
@@ -397,52 +359,6 @@
         msg = schemacfg._cw._("this parser doesn't use a mapping")
         raise ValidationError(schemacfg.eid, {None: msg})
 
-    @deprecated('[3.21] use the new store API')
-    def extid2entity(self, uri, etype, **sourceparams):
-        """Return an entity for the given uri. May return None if it should be
-        skipped.
-
-        If a `raise_on_error` keyword parameter is passed, a ValidationError
-        exception may be raised.
-        """
-        raise_on_error = sourceparams.pop('raise_on_error', False)
-        cnx = self._cw
-        # if cwsource is specified and repository has a source with the same
-        # name, call extid2eid on that source so entity will be properly seen as
-        # coming from this source
-        source_uri = sourceparams.pop('cwsource', None)
-        if source_uri is not None and source_uri != 'system':
-            source = cnx.repo.sources_by_uri.get(source_uri, self.source)
-        else:
-            source = self.source
-        sourceparams['parser'] = self
-        if isinstance(uri, text_type):
-            uri = uri.encode('utf-8')
-        try:
-            eid = cnx.repo.extid2eid(source, uri, etype, cnx,
-                                     sourceparams=sourceparams)
-        except ValidationError as ex:
-            if raise_on_error:
-                raise
-            self.source.critical('error while creating %s: %s', etype, ex)
-            self.import_log.record_error('error while creating %s: %s'
-                                         % (etype, ex))
-            return None
-        if eid < 0:
-            # entity has been moved away from its original source
-            #
-            # Don't give etype to entity_from_eid so we get UnknownEid if the
-            # entity has been removed
-            try:
-                entity = cnx.entity_from_eid(-eid)
-            except UnknownEid:
-                return None
-            self.notify_updated(entity)  # avoid later update from the source's data
-            return entity
-        if self.source_uris is not None:
-            self.source_uris.pop(str(uri), None)
-        return cnx.entity_from_eid(eid, etype)
-
     def process_urls(self, urls, raise_on_error=False):
         error = False
         for url in urls:
@@ -470,14 +386,6 @@
         """main callback: process the url"""
         raise NotImplementedError
 
-    @deprecated('[3.21] use the new store API')
-    def before_entity_copy(self, entity, sourceparams):
-        raise NotImplementedError
-
-    @deprecated('[3.21] use the new store API')
-    def after_entity_copy(self, entity, sourceparams):
-        self.stats['created'].add(entity.eid)
-
     def created_during_pull(self, entity):
         return entity.eid in self.stats['created']
 
--- a/cubicweb/server/sources/native.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/server/sources/native.py	Fri Sep 30 17:36:40 2016 +0200
@@ -851,35 +851,6 @@
         res.append("system")
         return res
 
-    def extid2eid(self, cnx, extid):
-        """get eid from an external id. Return None if no record found."""
-        assert isinstance(extid, binary_type)
-        args = {'x': b64encode(extid).decode('ascii')}
-        cursor = self.doexec(cnx,
-                             'SELECT eid FROM entities WHERE extid=%(x)s',
-                             args)
-        # XXX testing rowcount cause strange bug with sqlite, results are there
-        #     but rowcount is 0
-        #if cursor.rowcount > 0:
-        try:
-            result = cursor.fetchone()
-            if result:
-                return result[0]
-        except Exception:
-            pass
-        cursor = self.doexec(cnx,
-                             'SELECT eid FROM moved_entities WHERE extid=%(x)s',
-                             args)
-        try:
-            result = cursor.fetchone()
-            if result:
-                # entity was moved to the system source, return negative
-                # number to tell the external source to ignore it
-                return -result[0]
-        except Exception:
-            pass
-        return None
-
     def _handle_is_relation_sql(self, cnx, sql, attrs):
         """ Handler for specific is_relation sql that may be
         overwritten in some stores"""
--- a/cubicweb/server/test/unittest_datafeed.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/server/test/unittest_datafeed.py	Fri Sep 30 17:36:40 2016 +0200
@@ -22,6 +22,7 @@
 
 from cubicweb.devtools.testlib import CubicWebTC
 from cubicweb.server.sources import datafeed
+from cubicweb.dataimport.stores import NoHookRQLObjectStore, MetaGenerator
 
 
 class DataFeedTC(CubicWebTC):
@@ -37,15 +38,16 @@
     def base_parser(self, session):
         class AParser(datafeed.DataFeedParser):
             __regid__ = 'testparser'
+
             def process(self, url, raise_on_error=False):
-                entity = self.extid2entity('http://www.cubicweb.org/', 'Card',
-                                           item={'title': u'cubicweb.org',
-                                                 'content': u'the cw web site'},
-                                           raise_on_error=raise_on_error)
-                if not self.created_during_pull(entity):
-                    self.notify_updated(entity)
-            def before_entity_copy(self, entity, sourceparams):
-                entity.cw_edited.update(sourceparams['item'])
+                metagenerator = MetaGenerator(self._cw, source=self.source)
+                store = NoHookRQLObjectStore(self._cw, metagenerator)
+                store.prepare_insert_entity('Card',
+                                            cwuri=u'http://www.cubicweb.org/',
+                                            title=u'cubicweb.org',
+                                            content=u'the cw web site')
+                store.flush()
+                store.commit()
 
         with self.temporary_appobjects(AParser):
             if u'ô myfeed' in self.repo.sources_by_uri:
@@ -72,14 +74,11 @@
 
         with self.repo.internal_cnx() as cnx:
             with self.base_parser(cnx):
-                stats = dfsource.pull_data(cnx, force=True)
+                stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
                 cnx.commit()
                 # test import stats
                 self.assertEqual(sorted(stats), ['checked', 'created', 'updated'])
-                self.assertEqual(len(stats['created']), 1)
                 entity = cnx.execute('Card X').get_entity(0, 0)
-                self.assertIn(entity.eid, stats['created'])
-                self.assertEqual(stats['updated'], set())
                 # test imported entities
                 self.assertEqual(entity.title, 'cubicweb.org')
                 self.assertEqual(entity.content, 'the cw web site')
@@ -94,22 +93,8 @@
                 # test repo cache keys
                 self.assertEqual(self.repo._type_source_cache[entity.eid],
                                  ('Card', b'http://www.cubicweb.org/', u'ô myfeed'))
-                self.assertEqual(self.repo._extid_cache[b'http://www.cubicweb.org/'],
-                                 entity.eid)
-                # test repull
-                stats = dfsource.pull_data(cnx, force=True)
-                self.assertEqual(stats['created'], set())
-                self.assertEqual(stats['updated'], set((entity.eid,)))
-                # test repull with caches reseted
-                self.repo._type_source_cache.clear()
-                self.repo._extid_cache.clear()
-                stats = dfsource.pull_data(cnx, force=True)
-                self.assertEqual(stats['created'], set())
-                self.assertEqual(stats['updated'], set((entity.eid,)))
                 self.assertEqual(self.repo._type_source_cache[entity.eid],
                                  ('Card', b'http://www.cubicweb.org/', u'ô myfeed'))
-                self.assertEqual(self.repo._extid_cache[b'http://www.cubicweb.org/'],
-                                 entity.eid)
 
                 self.assertEqual(dfsource.source_uris(cnx),
                                  {b'http://www.cubicweb.org/': (entity.eid, 'Card')})
@@ -130,8 +115,6 @@
                              )
             self.assertEqual(self.repo._type_source_cache[entity.eid],
                              ('Card', b'http://www.cubicweb.org/', 'myrenamedfeed'))
-            self.assertEqual(self.repo._extid_cache[b'http://www.cubicweb.org/'],
-                             entity.eid)
 
             # test_delete_source
             cnx.execute('DELETE CWSource S WHERE S name "myrenamedfeed"')
--- a/cubicweb/sobjects/cwxmlparser.py	Fri Sep 30 17:36:02 2016 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,497 +0,0 @@
-# copyright 2010-2015 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
-# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
-#
-# This file is part of CubicWeb.
-#
-# CubicWeb is free software: you can redistribute it and/or modify it under the
-# terms of the GNU Lesser General Public License as published by the Free
-# Software Foundation, either version 2.1 of the License, or (at your option)
-# any later version.
-#
-# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Lesser General Public License along
-# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
-"""datafeed parser for xml generated by cubicweb
-
-Example of mapping for CWEntityXMLParser::
-
-  {u'CWUser': {                                        # EntityType
-      (u'in_group', u'subject', u'link'): [            # (rtype, role, action)
-          (u'CWGroup', {u'linkattr': u'name'})],       #   -> rules = [(EntityType, options), ...]
-      (u'tags', u'object', u'link-or-create'): [       # (...)
-          (u'Tag', {u'linkattr': u'name'})],           #   -> ...
-      (u'use_email', u'subject', u'copy'): [           # (...)
-          (u'EmailAddress', {})]                       #   -> ...
-      }
-   }
-
-"""
-
-from datetime import datetime, time
-import urllib
-
-from six import text_type
-from six.moves.urllib.parse import urlparse, urlunparse, parse_qs, urlencode
-
-import pytz
-from logilab.common.date import todate, totime
-from logilab.common.textutils import splitstrip, text_to_dict
-from logilab.common.decorators import classproperty
-
-from yams.constraints import BASE_CONVERTERS
-from yams.schema import role_name as rn
-
-from cubicweb import ValidationError, RegistryException
-from cubicweb.view import Component
-from cubicweb.server.sources import datafeed
-from cubicweb.server.hook import match_rtype
-
-# XXX see cubicweb.cwvreg.YAMS_TO_PY
-# XXX see cubicweb.web.views.xmlrss.SERIALIZERS
-DEFAULT_CONVERTERS = BASE_CONVERTERS.copy()
-DEFAULT_CONVERTERS['String'] = text_type
-DEFAULT_CONVERTERS['Password'] = lambda x: x.encode('utf8')
-def convert_date(ustr):
-    return todate(datetime.strptime(ustr, '%Y-%m-%d'))
-DEFAULT_CONVERTERS['Date'] = convert_date
-def convert_datetime(ustr):
-    if '.' in ustr: # assume %Y-%m-%d %H:%M:%S.mmmmmm
-        ustr = ustr.split('.', 1)[0]
-    return datetime.strptime(ustr, '%Y-%m-%d %H:%M:%S')
-DEFAULT_CONVERTERS['Datetime'] = convert_datetime
-# XXX handle timezone, though this will be enough as TZDatetime are
-# serialized without time zone by default (UTC time). See
-# cw.web.views.xmlrss.SERIALIZERS.
-def convert_tzdatetime(ustr):
-    date = convert_datetime(ustr)
-    date = date.replace(tzinfo=pytz.utc)
-    return date
-DEFAULT_CONVERTERS['TZDatetime'] = convert_tzdatetime
-def convert_time(ustr):
-    return totime(datetime.strptime(ustr, '%H:%M:%S'))
-DEFAULT_CONVERTERS['Time'] = convert_time
-DEFAULT_CONVERTERS['TZTime'] = convert_time
-def convert_interval(ustr):
-    return time(seconds=int(ustr))
-DEFAULT_CONVERTERS['Interval'] = convert_interval
-
-def extract_typed_attrs(eschema, stringdict, converters=DEFAULT_CONVERTERS):
-    typeddict = {}
-    for rschema in eschema.subject_relations():
-        if rschema.final and rschema in stringdict:
-            if rschema in ('eid', 'cwuri'):  # XXX really omit cwuri?
-                continue
-            attrtype = eschema.destination(rschema)
-            value = stringdict[rschema]
-            if value is not None:
-                value = converters[attrtype](value)
-            typeddict[rschema.type] = value
-    return typeddict
-
-def rtype_role_rql(rtype, role):
-    if role == 'object':
-        return 'Y %s X WHERE X eid %%(x)s' % rtype
-    else:
-        return 'X %s Y WHERE X eid %%(x)s' % rtype
-
-
-class CWEntityXMLParser(datafeed.DataFeedXMLParser):
-    """datafeed parser for the 'xml' entity view
-
-    Most of the logic is delegated to the following components:
-
-    * an "item builder" component, turning an etree xml node into a specific
-      python dictionary representing an entity
-
-    * "action" components, selected given an entity, a relation and its role in
-      the relation, and responsible to link the entity to given related items
-      (eg dictionary)
-
-    So the parser is only doing the gluing service and the connection to the
-    source.
-    """
-    __regid__ = 'cw.entityxml'
-
-    def __init__(self, *args, **kwargs):
-        super(CWEntityXMLParser, self).__init__(*args, **kwargs)
-        self._parsed_urls = {}
-        self._processed_entities = set()
-
-    def select_linker(self, action, rtype, role, entity=None):
-        try:
-            return self._cw.vreg['components'].select(
-                'cw.entityxml.action.%s' % action, self._cw, entity=entity,
-                rtype=rtype, role=role, parser=self)
-        except RegistryException:
-            raise RegistryException('Unknown action %s' % action)
-
-    def list_actions(self):
-        reg = self._cw.vreg['components']
-        return sorted(clss[0].action for rid, clss in reg.items()
-                      if rid.startswith('cw.entityxml.action.'))
-
-    # mapping handling #########################################################
-
-    def add_schema_config(self, schemacfg, checkonly=False):
-        """added CWSourceSchemaConfig, modify mapping accordingly"""
-        _ = self._cw._
-        try:
-            rtype = schemacfg.schema.rtype.name
-        except AttributeError:
-            msg = _("entity and relation types can't be mapped, only attributes "
-                    "or relations")
-            raise ValidationError(schemacfg.eid, {rn('cw_for_schema', 'subject'): msg})
-        if schemacfg.options:
-            options = text_to_dict(schemacfg.options)
-        else:
-            options = {}
-        try:
-            role = options.pop('role')
-            if role not in ('subject', 'object'):
-                raise KeyError
-        except KeyError:
-            msg = _('"role=subject" or "role=object" must be specified in options')
-            raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
-        try:
-            action = options.pop('action')
-            linker = self.select_linker(action, rtype, role)
-            linker.check_options(options, schemacfg.eid)
-        except KeyError:
-            msg = _('"action" must be specified in options; allowed values are '
-                    '%s') % ', '.join(self.list_actions())
-            raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
-        except RegistryException:
-            msg = _('allowed values for "action" are %s') % ', '.join(self.list_actions())
-            raise ValidationError(schemacfg.eid, {rn('options', 'subject'): msg})
-        if not checkonly:
-            if role == 'subject':
-                etype = schemacfg.schema.stype.name
-                ttype = schemacfg.schema.otype.name
-            else:
-                etype = schemacfg.schema.otype.name
-                ttype = schemacfg.schema.stype.name
-            etyperules = self.source.mapping.setdefault(etype, {})
-            etyperules.setdefault((rtype, role, action), []).append(
-                (ttype, options))
-            self.source.mapping_idx[schemacfg.eid] = (
-                etype, rtype, role, action, ttype)
-
-    def del_schema_config(self, schemacfg, checkonly=False):
-        """deleted CWSourceSchemaConfig, modify mapping accordingly"""
-        etype, rtype, role, action, ttype = self.source.mapping_idx[schemacfg.eid]
-        rules = self.source.mapping[etype][(rtype, role, action)]
-        rules = [x for x in rules if not x[0] == ttype]
-        if not rules:
-            del self.source.mapping[etype][(rtype, role, action)]
-
-    # import handling ##########################################################
-
-    def process(self, url, raise_on_error=False):
-        """IDataFeedParser main entry point"""
-        if url.startswith('http'): # XXX similar loose test as in parse of sources.datafeed
-            url = self.complete_url(url)
-        super(CWEntityXMLParser, self).process(url, raise_on_error)
-
-    def parse_etree(self, parent):
-        """Overriden from :class:`DataFeedXMLParser` to use a builder component."""
-        for node in list(parent):
-            builder = self._cw.vreg['components'].select(
-                'cw.entityxml.item-builder', self._cw, node=node,
-                parser=self)
-            yield builder.build_item()
-
-    def process_item(self, item, rels, raise_on_error=False):
-        """
-        item and rels are what's returned by the item builder `build_item` method:
-
-        * `item` is an {attribute: value} dictionary
-        * `rels` is for relations and structured as
-           {role: {relation: [(related item, related rels)...]}
-        """
-        entity = self.extid2entity(item['cwuri'].encode('ascii'), item['cwtype'],
-                                   cwsource=item['cwsource'], item=item,
-                                   raise_on_error=raise_on_error)
-        if entity is None:
-            return None
-        if entity.eid in self._processed_entities:
-            return entity
-        self._processed_entities.add(entity.eid)
-        if not (self.created_during_pull(entity) or self.updated_during_pull(entity)):
-            attrs = extract_typed_attrs(entity.e_schema, item)
-            self.update_if_necessary(entity, attrs)
-        self.process_relations(entity, rels)
-        return entity
-
-    def process_relations(self, entity, rels):
-        etype = entity.cw_etype
-        for (rtype, role, action), rules in self.source.mapping.get(etype, {}).items():
-            try:
-                related_items = rels[role][rtype]
-            except KeyError:
-                self.import_log.record_error('relation %s-%s not found in xml export of %s'
-                                             % (rtype, role, etype))
-                continue
-            try:
-                linker = self.select_linker(action, rtype, role, entity)
-            except RegistryException:
-                self.import_log.record_error('no linker for action %s' % action)
-            else:
-                linker.link_items(related_items, rules)
-
-    def before_entity_copy(self, entity, sourceparams):
-        """IDataFeedParser callback"""
-        attrs = extract_typed_attrs(entity.e_schema, sourceparams['item'])
-        entity.cw_edited.update(attrs)
-
-    def normalize_url(self, url):
-        """overridden to add vid=xml if vid is not set in the qs"""
-        url = super(CWEntityXMLParser, self).normalize_url(url)
-        purl = urlparse(url)
-        if purl.scheme in ('http', 'https'):
-            params = parse_qs(purl.query)
-            if 'vid' not in params:
-                params['vid'] = ['xml']
-                purl = list(purl)
-                purl[4] = urlencode(params, doseq=True)
-                return urlunparse(purl)
-        return url
-
-    def complete_url(self, url, etype=None, known_relations=None):
-        """append to the url's query string information about relation that should
-        be included in the resulting xml, according to source mapping.
-
-        If etype is not specified, try to guess it using the last path part of
-        the url, i.e. the format used by default in cubicweb to map all entities
-        of a given type as in 'http://mysite.org/EntityType'.
-
-        If `known_relations` is given, it should be a dictionary of already
-        known relations, so they don't get queried again.
-        """
-        purl = urlparse(url)
-        params = parse_qs(purl.query)
-        if etype is None:
-            etype = purl.path.split('/')[-1]
-        try:
-            etype = self._cw.vreg.case_insensitive_etypes[etype.lower()]
-        except KeyError:
-            return url
-        relations = params['relation'] = set(params.get('relation', ()))
-        for rtype, role, _ in self.source.mapping.get(etype, ()):
-            if known_relations and rtype in known_relations.get('role', ()):
-                continue
-            relations.add('%s-%s' % (rtype, role))
-        purl = list(purl)
-        purl[4] = urlencode(params, doseq=True)
-        return urlunparse(purl)
-
-    def complete_item(self, item, rels):
-        try:
-            return self._parsed_urls[item['cwuri']]
-        except KeyError:
-            itemurl = self.complete_url(item['cwuri'], item['cwtype'], rels)
-            item_rels = list(self.parse(itemurl))
-            assert len(item_rels) == 1, 'url %s expected to bring back one '\
-                'and only one entity, got %s' % (itemurl, len(item_rels))
-            self._parsed_urls[item['cwuri']] = item_rels[0]
-            if rels:
-                # XXX (do it better) merge relations
-                new_rels = item_rels[0][1]
-                new_rels.get('subject', {}).update(rels.get('subject', {}))
-                new_rels.get('object', {}).update(rels.get('object', {}))
-            return item_rels[0]
-
-
-class CWEntityXMLItemBuilder(Component):
-    __regid__ = 'cw.entityxml.item-builder'
-
-    def __init__(self, _cw, parser, node, **kwargs):
-        super(CWEntityXMLItemBuilder, self).__init__(_cw, **kwargs)
-        self.parser = parser
-        self.node = node
-
-    def build_item(self):
-        """parse a XML document node and return two dictionaries defining (part
-        of) an entity:
-
-        - {attribute: value}
-        - {role: {relation: [(related item, related rels)...]}
-        """
-        node = self.node
-        item = dict(node.attrib.items())
-        item['cwtype'] = text_type(node.tag)
-        item.setdefault('cwsource', None)
-        item['eid'] = int(item['eid'])
-        rels = {}
-        for child in node:
-            role = child.get('role')
-            if role:
-                # relation
-                related = rels.setdefault(role, {}).setdefault(child.tag, [])
-                related += self.parser.parse_etree(child)
-            elif child.text:
-                # attribute
-                item[child.tag] = text_type(child.text)
-            else:
-                # None attribute (empty tag)
-                item[child.tag] = None
-        return item, rels
-
-
-class CWEntityXMLActionCopy(Component):
-    """implementation of cubicweb entity xml parser's'copy' action
-
-    Takes no option.
-    """
-    __regid__ = 'cw.entityxml.action.copy'
-
-    def __init__(self, _cw, parser, rtype, role, entity=None, **kwargs):
-        super(CWEntityXMLActionCopy, self).__init__(_cw, **kwargs)
-        self.parser = parser
-        self.rtype = rtype
-        self.role = role
-        self.entity = entity
-
-    @classproperty
-    def action(cls):
-        return cls.__regid__.rsplit('.', 1)[-1]
-
-    def check_options(self, options, eid):
-        self._check_no_options(options, eid)
-
-    def _check_no_options(self, options, eid, msg=None):
-        if options:
-            if msg is None:
-                msg = self._cw._("'%s' action doesn't take any options") % self.action
-            raise ValidationError(eid, {rn('options', 'subject'): msg})
-
-    def link_items(self, others, rules):
-        assert not any(x[1] for x in rules), "'copy' action takes no option"
-        ttypes = frozenset([x[0] for x in rules])
-        eids = [] # local eids
-        for item, rels in others:
-            if item['cwtype'] in ttypes:
-                item, rels = self.parser.complete_item(item, rels)
-                other_entity = self.parser.process_item(item, rels)
-                if other_entity is not None:
-                    eids.append(other_entity.eid)
-        if eids:
-            self._set_relation(eids)
-        else:
-            self._clear_relation(ttypes)
-
-    def _clear_relation(self, ttypes):
-        if not self.parser.created_during_pull(self.entity):
-            if len(ttypes) > 1:
-                typerestr = ', Y is IN(%s)' % ','.join(ttypes)
-            else:
-                typerestr = ', Y is %s' % ','.join(ttypes)
-            self._cw.execute('DELETE ' + rtype_role_rql(self.rtype, self.role) + typerestr,
-                             {'x': self.entity.eid})
-
-    def _set_relation(self, eids):
-        assert eids
-        rtype = self.rtype
-        rqlbase = rtype_role_rql(rtype, self.role)
-        eidstr = ','.join(str(eid) for eid in eids)
-        self._cw.execute('DELETE %s, NOT Y eid IN (%s)' % (rqlbase, eidstr),
-                         {'x': self.entity.eid})
-        if self.role == 'object':
-            rql = 'SET %s, Y eid IN (%s), NOT Y %s X' % (rqlbase, eidstr, rtype)
-        else:
-            rql = 'SET %s, Y eid IN (%s), NOT X %s Y' % (rqlbase, eidstr, rtype)
-        self._cw.execute(rql, {'x': self.entity.eid})
-
-
-class CWEntityXMLActionLink(CWEntityXMLActionCopy):
-    """implementation of cubicweb entity xml parser's'link' action
-
-    requires a 'linkattr' option to control search of the linked entity.
-    """
-    __regid__ = 'cw.entityxml.action.link'
-
-    def check_options(self, options, eid):
-        if 'linkattr' not in options:
-            msg = self._cw._("'%s' action requires 'linkattr' option") % self.action
-            raise ValidationError(eid, {rn('options', 'subject'): msg})
-
-    create_when_not_found = False
-
-    def link_items(self, others, rules):
-        for ttype, options in rules:
-            searchattrs = splitstrip(options.get('linkattr', ''))
-            self._related_link(ttype, others, searchattrs)
-
-    def _related_link(self, ttype, others, searchattrs):
-        def issubset(x, y):
-            return all(z in y for z in x)
-        eids = [] # local eids
-        log = self.parser.import_log
-        for item, rels in others:
-            if item['cwtype'] != ttype:
-                continue
-            if not issubset(searchattrs, item):
-                item, rels = self.parser.complete_item(item, rels)
-                if not issubset(searchattrs, item):
-                    log.record_error('missing attribute, got %s expected keys %s'
-                                     % (item, searchattrs))
-                    continue
-            # XXX str() needed with python < 2.6
-            kwargs = dict((str(attr), item[attr]) for attr in searchattrs)
-            targets = self._find_entities(item, kwargs)
-            if len(targets) == 1:
-                entity = targets[0]
-            elif not targets and self.create_when_not_found:
-                entity = self._cw.create_entity(item['cwtype'], **kwargs)
-            else:
-                if len(targets) > 1:
-                    log.record_error('ambiguous link: found %s entity %s with attributes %s'
-                                     % (len(targets), item['cwtype'], kwargs))
-                else:
-                    log.record_error('can not find %s entity with attributes %s'
-                                     % (item['cwtype'], kwargs))
-                continue
-            eids.append(entity.eid)
-            self.parser.process_relations(entity, rels)
-        if eids:
-            self._set_relation(eids)
-        else:
-            self._clear_relation((ttype,))
-
-    def _find_entities(self, item, kwargs):
-        return tuple(self._cw.find(item['cwtype'], **kwargs).entities())
-
-
-class CWEntityXMLActionLinkInState(CWEntityXMLActionLink):
-    """custom implementation of cubicweb entity xml parser's'link' action for
-    in_state relation
-    """
-    __select__ = match_rtype('in_state')
-
-    def check_options(self, options, eid):
-        super(CWEntityXMLActionLinkInState, self).check_options(options, eid)
-        if 'name' not in options['linkattr']:
-            msg = self._cw._("'%s' action for in_state relation should at least "
-                             "have 'linkattr=name' option") % self.action
-            raise ValidationError(eid, {rn('options', 'subject'): msg})
-
-    def _find_entities(self, item, kwargs):
-        assert 'name' in item # XXX else, complete_item
-        state_name = item['name']
-        wf = self.entity.cw_adapt_to('IWorkflowable').current_workflow
-        state = wf.state_by_name(state_name)
-        if state is None:
-            return ()
-        return (state,)
-
-
-class CWEntityXMLActionLinkOrCreate(CWEntityXMLActionLink):
-    """implementation of cubicweb entity xml parser's'link-or-create' action
-
-    requires a 'linkattr' option to control search of the linked entity.
-    """
-    __regid__ = 'cw.entityxml.action.link-or-create'
-    create_when_not_found = True
--- a/cubicweb/sobjects/services.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/sobjects/services.py	Fri Sep 30 17:36:40 2016 +0200
@@ -52,7 +52,6 @@
             results['%s_cache_miss' % title] = misses
             results['%s_cache_hit_percent' % title] = (hits * 100) / (hits + misses)
         results['type_source_cache_size'] = len(repo._type_source_cache)
-        results['extid_cache_size'] = len(repo._extid_cache)
         results['sql_no_cache'] = repo.system_source.no_cache
         results['nb_open_sessions'] = len(repo._sessions)
         results['nb_active_threads'] = threading.activeCount()
--- a/cubicweb/sobjects/test/unittest_cwxmlparser.py	Fri Sep 30 17:36:02 2016 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,338 +0,0 @@
-# copyright 2011-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
-# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
-#
-# This file is part of CubicWeb.
-#
-# CubicWeb is free software: you can redistribute it and/or modify it under the
-# terms of the GNU Lesser General Public License as published by the Free
-# Software Foundation, either version 2.1 of the License, or (at your option)
-# any later version.
-#
-# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
-# details.
-#
-# You should have received a copy of the GNU Lesser General Public License along
-# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
-
-from datetime import datetime
-
-from six.moves.urllib.parse import urlsplit, parse_qsl
-
-import pytz
-from cubicweb.devtools.testlib import CubicWebTC
-from cubicweb.sobjects.cwxmlparser import CWEntityXMLParser
-
-orig_parse = CWEntityXMLParser.parse
-
-def parse(self, url):
-    try:
-        url = RELATEDXML[url.split('?')[0]]
-    except KeyError:
-        pass
-    return orig_parse(self, url)
-
-def setUpModule():
-    CWEntityXMLParser.parse = parse
-
-def tearDownModule():
-    CWEntityXMLParser.parse = orig_parse
-
-
-BASEXML = ''.join(u'''
-<rset size="1">
- <CWUser eid="5" cwuri="http://pouet.org/5" cwsource="system">
-  <login>sthenault</login>
-  <upassword>toto</upassword>
-  <last_login_time>2011-01-25 14:14:06</last_login_time>
-  <creation_date>2010-01-22 10:27:59</creation_date>
-  <modification_date>2011-01-25 14:14:06</modification_date>
-  <use_email role="subject">
-    <EmailAddress cwuri="http://pouet.org/6" eid="6"/>
-  </use_email>
-  <in_group role="subject">
-    <CWGroup cwuri="http://pouet.org/7" eid="7"/>
-    <CWGroup cwuri="http://pouet.org/8" eid="8"/>
-  </in_group>
-  <tags role="object">
-    <Tag cwuri="http://pouet.org/9" eid="9"/>
-    <Tag cwuri="http://pouet.org/10" eid="10"/>
-  </tags>
-  <in_state role="subject">
-    <State cwuri="http://pouet.org/11" eid="11" name="activated"/>
-  </in_state>
- </CWUser>
-</rset>
-'''.splitlines())
-
-RELATEDXML = {
-    'http://pouet.org/6': u'''
-<rset size="1">
- <EmailAddress eid="6" cwuri="http://pouet.org/6">
-  <address>syt@logilab.fr</address>
-  <modification_date>2010-04-13 14:35:56</modification_date>
-  <creation_date>2010-04-13 14:35:56</creation_date>
-  <tags role="object">
-    <Tag cwuri="http://pouet.org/9" eid="9"/>
-  </tags>
- </EmailAddress>
-</rset>
-''',
-    'http://pouet.org/7': u'''
-<rset size="1">
- <CWGroup eid="7" cwuri="http://pouet.org/7">
-  <name>users</name>
-  <tags role="object">
-    <Tag cwuri="http://pouet.org/9" eid="9"/>
-  </tags>
- </CWGroup>
-</rset>
-''',
-    'http://pouet.org/8': u'''
-<rset size="1">
- <CWGroup eid="8" cwuri="http://pouet.org/8">
-  <name>unknown</name>
- </CWGroup>
-</rset>
-''',
-    'http://pouet.org/9': u'''
-<rset size="1">
- <Tag eid="9" cwuri="http://pouet.org/9">
-  <name>hop</name>
- </Tag>
-</rset>
-''',
-    'http://pouet.org/10': u'''
-<rset size="1">
- <Tag eid="10" cwuri="http://pouet.org/10">
-  <name>unknown</name>
- </Tag>
-</rset>
-''',
-    }
-
-
-OTHERXML = ''.join(u'''
-<rset size="1">
- <CWUser eid="5" cwuri="http://pouet.org/5" cwsource="myfeed">
-  <login>sthenault</login>
-  <upassword>toto</upassword>
-  <last_login_time>2011-01-25 14:14:06</last_login_time>
-  <creation_date>2010-01-22 10:27:59</creation_date>
-  <modification_date>2011-01-25 14:14:06</modification_date>
-  <in_group role="subject">
-    <CWGroup cwuri="http://pouet.org/7" eid="7"/>
-  </in_group>
- </CWUser>
-</rset>
-'''.splitlines()
-)
-
-
-class CWEntityXMLParserTC(CubicWebTC):
-    """/!\ this test use a pre-setup database /!\, if you modify above xml,
-    REMOVE THE DATABASE TEMPLATE else it won't be considered
-    """
-    test_db_id = 'xmlparser'
-
-    def assertURLEquiv(self, first, second):
-        # ignore ordering differences in query params
-        parsed_first = urlsplit(first)
-        parsed_second = urlsplit(second)
-        self.assertEqual(parsed_first.scheme, parsed_second.scheme)
-        self.assertEqual(parsed_first.netloc, parsed_second.netloc)
-        self.assertEqual(parsed_first.path, parsed_second.path)
-        self.assertEqual(parsed_first.fragment, parsed_second.fragment)
-        self.assertCountEqual(parse_qsl(parsed_first.query), parse_qsl(parsed_second.query))
-
-    @classmethod
-    def pre_setup_database(cls, cnx, config):
-        myfeed = cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
-                                   parser=u'cw.entityxml', url=BASEXML)
-        myotherfeed = cnx.create_entity('CWSource', name=u'myotherfeed', type=u'datafeed',
-                                        parser=u'cw.entityxml', url=OTHERXML)
-        cnx.commit()
-        myfeed.init_mapping([(('CWUser', 'use_email', '*'),
-                              u'role=subject\naction=copy'),
-                             (('CWUser', 'in_group', '*'),
-                              u'role=subject\naction=link\nlinkattr=name'),
-                             (('CWUser', 'in_state', '*'),
-                              u'role=subject\naction=link\nlinkattr=name'),
-                             (('*', 'tags', '*'),
-                              u'role=object\naction=link-or-create\nlinkattr=name'),
-                            ])
-        myotherfeed.init_mapping([(('CWUser', 'in_group', '*'),
-                                   u'role=subject\naction=link\nlinkattr=name'),
-                                  (('CWUser', 'in_state', '*'),
-                                   u'role=subject\naction=link\nlinkattr=name'),
-                                  ])
-        cnx.create_entity('Tag', name=u'hop')
-        cnx.commit()
-
-    def test_complete_url(self):
-        dfsource = self.repo.sources_by_uri['myfeed']
-        with self.admin_access.repo_cnx() as cnx:
-            parser = dfsource._get_parser(cnx)
-            self.assertURLEquiv(parser.complete_url('http://www.cubicweb.org/CWUser'),
-                                'http://www.cubicweb.org/CWUser?relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject')
-            self.assertURLEquiv(parser.complete_url('http://www.cubicweb.org/cwuser'),
-                                'http://www.cubicweb.org/cwuser?relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject')
-            self.assertURLEquiv(parser.complete_url('http://www.cubicweb.org/cwuser?vid=rdf&relation=hop'),
-                                'http://www.cubicweb.org/cwuser?relation=hop&relation=tags-object&relation=in_group-subject&relation=in_state-subject&relation=use_email-subject&vid=rdf')
-            self.assertURLEquiv(parser.complete_url('http://www.cubicweb.org/?rql=cwuser&vid=rdf&relation=hop'),
-                                'http://www.cubicweb.org/?rql=cwuser&relation=hop&vid=rdf')
-            self.assertURLEquiv(parser.complete_url('http://www.cubicweb.org/?rql=cwuser&relation=hop'),
-                                'http://www.cubicweb.org/?rql=cwuser&relation=hop')
-
-
-    def test_actions(self):
-        dfsource = self.repo.sources_by_uri['myfeed']
-        self.assertEqual(dfsource.mapping,
-                         {u'CWUser': {
-                             (u'in_group', u'subject', u'link'): [
-                                 (u'CWGroup', {u'linkattr': u'name'})],
-                             (u'in_state', u'subject', u'link'): [
-                                 (u'State', {u'linkattr': u'name'})],
-                             (u'tags', u'object', u'link-or-create'): [
-                                 (u'Tag', {u'linkattr': u'name'})],
-                             (u'use_email', u'subject', u'copy'): [
-                                 (u'EmailAddress', {})]
-                             },
-                          u'CWGroup': {
-                             (u'tags', u'object', u'link-or-create'): [
-                                 (u'Tag', {u'linkattr': u'name'})],
-                             },
-                          u'EmailAddress': {
-                             (u'tags', u'object', u'link-or-create'): [
-                                 (u'Tag', {u'linkattr': u'name'})],
-                             },
-                          })
-        with self.repo.internal_cnx() as cnx:
-            stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
-            self.assertEqual(sorted(stats), ['checked', 'created', 'updated'])
-            self.assertEqual(len(stats['created']), 2)
-            self.assertEqual(stats['updated'], set())
-
-        with self.admin_access.web_request() as req:
-            user = req.execute('CWUser X WHERE X login "sthenault"').get_entity(0, 0)
-            self.assertEqual(user.creation_date, datetime(2010, 1, 22, 10, 27, 59, tzinfo=pytz.utc))
-            self.assertEqual(user.modification_date, datetime(2011, 1, 25, 14, 14, 6, tzinfo=pytz.utc))
-            self.assertEqual(user.cwuri, 'http://pouet.org/5')
-            self.assertEqual(user.cw_source[0].name, 'myfeed')
-            self.assertEqual(user.absolute_url(), 'http://pouet.org/5')
-            self.assertEqual(len(user.use_email), 1)
-            # copy action
-            email = user.use_email[0]
-            self.assertEqual(email.address, 'syt@logilab.fr')
-            self.assertEqual(email.cwuri, 'http://pouet.org/6')
-            self.assertEqual(email.absolute_url(), 'http://pouet.org/6')
-            self.assertEqual(email.cw_source[0].name, 'myfeed')
-            self.assertEqual(len(email.reverse_tags), 1)
-            self.assertEqual(email.reverse_tags[0].name, 'hop')
-            # link action
-            self.assertFalse(req.execute('CWGroup X WHERE X name "unknown"'))
-            groups = sorted([g.name for g in user.in_group])
-            self.assertEqual(groups, ['users'])
-            group = user.in_group[0]
-            self.assertEqual(len(group.reverse_tags), 1)
-            self.assertEqual(group.reverse_tags[0].name, 'hop')
-            # link or create action
-            tags = set([(t.name, t.cwuri.replace(str(t.eid), ''), t.cw_source[0].name)
-                        for t in user.reverse_tags])
-            self.assertEqual(tags, set((('hop', 'http://testing.fr/cubicweb/', 'system'),
-                                        ('unknown', 'http://testing.fr/cubicweb/', 'system')))
-                             )
-        with self.repo.internal_cnx() as cnx:
-            stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
-            self.assertEqual(stats['created'], set())
-            self.assertEqual(len(stats['updated']), 0)
-            self.assertEqual(len(stats['checked']), 2)
-            self.repo._type_source_cache.clear()
-            self.repo._extid_cache.clear()
-            stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
-            self.assertEqual(stats['created'], set())
-            self.assertEqual(len(stats['updated']), 0)
-            self.assertEqual(len(stats['checked']), 2)
-
-            # test move to system source
-            cnx.execute('SET X cw_source S WHERE X eid %(x)s, S name "system"', {'x': email.eid})
-            cnx.commit()
-            rset = cnx.execute('EmailAddress X WHERE X address "syt@logilab.fr"')
-            self.assertEqual(len(rset), 1)
-            e = rset.get_entity(0, 0)
-            self.assertEqual(e.eid, email.eid)
-            self.assertEqual(e.cw_metainformation(), {'source': {'type': u'native', 'uri': u'system',
-                                                                 'use-cwuri-as-url': False},
-                                                      'type': 'EmailAddress',
-                                                      'extid': None})
-            self.assertEqual(e.cw_source[0].name, 'system')
-            self.assertEqual(e.reverse_use_email[0].login, 'sthenault')
-            # test everything is still fine after source synchronization
-            # clear caches to make sure we look at the moved_entities table
-            self.repo._type_source_cache.clear()
-            self.repo._extid_cache.clear()
-            stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
-            self.assertEqual(stats['updated'], set((email.eid,)))
-            rset = cnx.execute('EmailAddress X WHERE X address "syt@logilab.fr"')
-            self.assertEqual(len(rset), 1)
-            e = rset.get_entity(0, 0)
-            self.assertEqual(e.eid, email.eid)
-            self.assertEqual(e.cw_metainformation(), {'source': {'type': u'native', 'uri': u'system',
-                                                                 'use-cwuri-as-url': False},
-                                                      'type': 'EmailAddress',
-                                                      'extid': None})
-            self.assertEqual(e.cw_source[0].name, 'system')
-            self.assertEqual(e.reverse_use_email[0].login, 'sthenault')
-            cnx.commit()
-
-            # test delete entity
-            e.cw_delete()
-            cnx.commit()
-            # test everything is still fine after source synchronization
-            stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
-            rset = cnx.execute('EmailAddress X WHERE X address "syt@logilab.fr"')
-            self.assertEqual(len(rset), 0)
-            rset = cnx.execute('Any X WHERE X use_email E, X login "sthenault"')
-            self.assertEqual(len(rset), 0)
-
-    def test_external_entity(self):
-        dfsource = self.repo.sources_by_uri['myotherfeed']
-        with self.repo.internal_cnx() as cnx:
-            stats = dfsource.pull_data(cnx, force=True, raise_on_error=True)
-            user = cnx.execute('CWUser X WHERE X login "sthenault"').get_entity(0, 0)
-            self.assertEqual(user.creation_date, datetime(2010, 1, 22, 10, 27, 59, tzinfo=pytz.utc))
-            self.assertEqual(user.modification_date, datetime(2011, 1, 25, 14, 14, 6, tzinfo=pytz.utc))
-            self.assertEqual(user.cwuri, 'http://pouet.org/5')
-            self.assertEqual(user.cw_source[0].name, 'myfeed')
-
-    def test_noerror_missing_fti_attribute(self):
-        dfsource = self.repo.sources_by_uri['myfeed']
-        with self.repo.internal_cnx() as cnx:
-            parser = dfsource._get_parser(cnx)
-            parser.process_urls(['''
-<rset size="1">
- <Card eid="50" cwuri="http://pouet.org/50" cwsource="system">
-  <title>how-to</title>
- </Card>
-</rset>
-'''], raise_on_error=True)
-
-    def test_noerror_unspecified_date(self):
-        dfsource = self.repo.sources_by_uri['myfeed']
-        with self.repo.internal_cnx() as cnx:
-            parser = dfsource._get_parser(cnx)
-            parser.process_urls(['''
-<rset size="1">
- <Card eid="50" cwuri="http://pouet.org/50" cwsource="system">
-  <title>how-to</title>
-  <content>how-to</content>
-  <synopsis>how-to</synopsis>
-  <creation_date/>
- </Card>
-</rset>
-'''], raise_on_error=True)
-
-if __name__ == '__main__':
-    from logilab.common.testlib import unittest_main
-    unittest_main()
--- a/cubicweb/web/test/unittest_views_cwsources.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/web/test/unittest_views_cwsources.py	Fri Sep 30 17:36:40 2016 +0200
@@ -1,13 +1,23 @@
 from logilab.common import tempattr
 from cubicweb.devtools.testlib import CubicWebTC
+from cubicweb.server.sources import datafeed
 
 
 class SynchronizeSourceTC(CubicWebTC):
+
     def test_synchronize_view(self):
         with self.admin_access.web_request(vid='cw.source-sync') as req:
-            source = req.create_entity('CWSource', name=u'ext', type=u'datafeed',
-                                       parser=u'cw.entityxml')
-            req.cnx.commit()
+
+            class AParser(datafeed.DataFeedParser):
+                __regid__ = 'testparser'
+
+                def process(self, url, raise_on_error=False):
+                    pass
+
+            with self.temporary_appobjects(AParser):
+                source = req.create_entity('CWSource', name=u'ext', type=u'datafeed',
+                                           parser=u'cw.entityxml')
+                req.cnx.commit()
 
             self.threads = 0
 
--- a/cubicweb/web/views/debug.py	Fri Sep 30 17:36:02 2016 +0200
+++ b/cubicweb/web/views/debug.py	Fri Sep 30 17:36:40 2016 +0200
@@ -94,7 +94,7 @@
         stats['looping_tasks'] = ', '.join('%s (%s seconds)' % (n, i) for n, i in stats['looping_tasks'])
         stats['threads'] = ', '.join(sorted(stats['threads']))
         for k in stats:
-            if k in ('extid_cache_size', 'type_source_cache_size'):
+            if k == 'type_source_cache_size':
                 continue
             if k.endswith('_cache_size'):
                 stats[k] = '%s / %s' % (stats[k]['size'], stats[k]['maxsize'])