cubicweb/server/test/unittest_datafeed.py
author Denis Laxalde <denis.laxalde@logilab.fr>
Wed, 28 Sep 2016 11:06:28 +0200
changeset 11740 dabbb2a4a493
parent 11252 6b1d09ef0c45
child 11756 60fed6272771
permissions -rw-r--r--
[datafeed] Complete the import log even if parser could not be found It happens that if the parser could not be found, _pull_data() would just return an empty dict without taking care to close the import log which it just opened. This leads to misleading information in the user interface where CWDataImport entities kept accumulating in the "imports" tab of CWSource primary view without anything else happening. So: * log an error message when parser cannot be found * always close (write logs and set "end_timestamp" attribute) import log when leaving _pull_data(). Closes #15505460.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
     1
# coding: utf-8
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
     2
# copyright 2011-2014 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     3
# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     4
#
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     5
# This file is part of CubicWeb.
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     6
#
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     7
# CubicWeb is free software: you can redistribute it and/or modify it under the
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     8
# terms of the GNU Lesser General Public License as published by the Free
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
     9
# Software Foundation, either version 2.1 of the License, or (at your option)
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    10
# any later version.
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    11
#
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    12
# CubicWeb is distributed in the hope that it will be useful, but WITHOUT
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    13
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    14
# FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    15
# details.
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    16
#
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    17
# You should have received a copy of the GNU Lesser General Public License along
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    18
# with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    19
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    20
from datetime import timedelta
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    21
from contextlib import contextmanager
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    22
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    23
from cubicweb.devtools.testlib import CubicWebTC
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    24
from cubicweb.server.sources import datafeed
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    25
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    26
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    27
class DataFeedTC(CubicWebTC):
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    28
    def setup_database(self):
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    29
        with self.admin_access.repo_cnx() as cnx:
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    30
            with self.base_parser(cnx):
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    31
                cnx.create_entity('CWSource', name=u'ô myfeed', type=u'datafeed',
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    32
                                  parser=u'testparser', url=u'ignored',
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    33
                                  config=u'synchronization-interval=1min')
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    34
                cnx.commit()
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    35
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    36
    @contextmanager
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    37
    def base_parser(self, session):
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    38
        class AParser(datafeed.DataFeedParser):
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    39
            __regid__ = 'testparser'
7386
206890413858 [repo test] datafeed parser's process method should now take raise_on_error argument
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 6957
diff changeset
    40
            def process(self, url, raise_on_error=False):
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    41
                entity = self.extid2entity('http://www.cubicweb.org/', 'Card',
7978
b6150210f692 [test] fix datafeed test, we have to use an internal session
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7735
diff changeset
    42
                                           item={'title': u'cubicweb.org',
10089
6346f53c85f1 [datafeed] Add a raise_on_error parameter to DataFeedSource.extid2entity
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 9897
diff changeset
    43
                                                 'content': u'the cw web site'},
6346f53c85f1 [datafeed] Add a raise_on_error parameter to DataFeedSource.extid2entity
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 9897
diff changeset
    44
                                           raise_on_error=raise_on_error)
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    45
                if not self.created_during_pull(entity):
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    46
                    self.notify_updated(entity)
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    47
            def before_entity_copy(self, entity, sourceparams):
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    48
                entity.cw_edited.update(sourceparams['item'])
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    49
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    50
        with self.temporary_appobjects(AParser):
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    51
            if u'ô myfeed' in self.repo.sources_by_uri:
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    52
                yield self.repo.sources_by_uri[u'ô myfeed']._get_parser(session)
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    53
            else:
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    54
                yield
11740
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
    55
        # vreg.unregister just pops appobjects from their regid entry,
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
    56
        # completely remove the entry to ensure we have no side effect with
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
    57
        # this empty entry.
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
    58
        del self.vreg['parsers'][AParser.__regid__]
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    59
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    60
    def test(self):
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    61
        self.assertIn(u'ô myfeed', self.repo.sources_by_uri)
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    62
        dfsource = self.repo.sources_by_uri[u'ô myfeed']
9822
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
    63
        self.assertNotIn('use_cwuri_as_url', dfsource.__dict__)
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    64
        self.assertEqual({'type': u'datafeed', 'uri': u'ô myfeed', 'use-cwuri-as-url': True},
9822
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
    65
                         dfsource.public_config)
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
    66
        self.assertEqual(dfsource.use_cwuri_as_url, True)
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    67
        self.assertEqual(dfsource.latest_retrieval, None)
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    68
        self.assertEqual(dfsource.synchro_interval, timedelta(seconds=60))
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    69
        self.assertFalse(dfsource.fresh())
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    70
        # ensure source's logger name has been unormalized
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    71
        self.assertEqual(dfsource.info.__self__.name, 'cubicweb.sources.o myfeed')
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
    72
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    73
        with self.repo.internal_cnx() as cnx:
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
    74
            with self.base_parser(cnx):
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    75
                stats = dfsource.pull_data(cnx, force=True)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    76
                cnx.commit()
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    77
                # test import stats
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    78
                self.assertEqual(sorted(stats), ['checked', 'created', 'updated'])
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    79
                self.assertEqual(len(stats['created']), 1)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    80
                entity = cnx.execute('Card X').get_entity(0, 0)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    81
                self.assertIn(entity.eid, stats['created'])
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    82
                self.assertEqual(stats['updated'], set())
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    83
                # test imported entities
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    84
                self.assertEqual(entity.title, 'cubicweb.org')
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    85
                self.assertEqual(entity.content, 'the cw web site')
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    86
                self.assertEqual(entity.cwuri, 'http://www.cubicweb.org/')
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    87
                self.assertEqual(entity.cw_source[0].name, u'ô myfeed')
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    88
                self.assertEqual(entity.cw_metainformation(),
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    89
                                 {'type': 'Card',
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    90
                                  'source': {'uri': u'ô myfeed', 'type': 'datafeed', 'use-cwuri-as-url': True},
10774
0361442e2633 [server/test] use bytes for extid
Julien Cristau <julien.cristau@logilab.fr>
parents: 10089
diff changeset
    91
                                  'extid': b'http://www.cubicweb.org/'}
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    92
                                 )
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    93
                self.assertEqual(entity.absolute_url(), 'http://www.cubicweb.org/')
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    94
                # test repo cache keys
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    95
                self.assertEqual(self.repo._type_source_cache[entity.eid],
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
    96
                                 ('Card', b'http://www.cubicweb.org/', u'ô myfeed'))
10774
0361442e2633 [server/test] use bytes for extid
Julien Cristau <julien.cristau@logilab.fr>
parents: 10089
diff changeset
    97
                self.assertEqual(self.repo._extid_cache[b'http://www.cubicweb.org/'],
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    98
                                 entity.eid)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
    99
                # test repull
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   100
                stats = dfsource.pull_data(cnx, force=True)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   101
                self.assertEqual(stats['created'], set())
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   102
                self.assertEqual(stats['updated'], set((entity.eid,)))
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   103
                # test repull with caches reseted
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   104
                self.repo._type_source_cache.clear()
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   105
                self.repo._extid_cache.clear()
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   106
                stats = dfsource.pull_data(cnx, force=True)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   107
                self.assertEqual(stats['created'], set())
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   108
                self.assertEqual(stats['updated'], set((entity.eid,)))
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   109
                self.assertEqual(self.repo._type_source_cache[entity.eid],
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
   110
                                 ('Card', b'http://www.cubicweb.org/', u'ô myfeed'))
10774
0361442e2633 [server/test] use bytes for extid
Julien Cristau <julien.cristau@logilab.fr>
parents: 10089
diff changeset
   111
                self.assertEqual(self.repo._extid_cache[b'http://www.cubicweb.org/'],
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   112
                                 entity.eid)
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   113
11252
6b1d09ef0c45 [datafeed] rename parser.sourceuris to source_uris
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 11057
diff changeset
   114
                self.assertEqual(dfsource.source_uris(cnx),
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
   115
                                 {b'http://www.cubicweb.org/': (entity.eid, 'Card')})
9860
e24bf60428d3 [tests/datafeed] use the new connection api (a small leftover)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9798
diff changeset
   116
                self.assertTrue(dfsource.latest_retrieval)
e24bf60428d3 [tests/datafeed] use the new connection api (a small leftover)
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9798
diff changeset
   117
                self.assertTrue(dfsource.fresh())
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   118
7735
71adfd6bab38 [source] update system table and internal structures on source renaming. Closes #1896721
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7717
diff changeset
   119
        # test_rename_source
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   120
        with self.admin_access.repo_cnx() as cnx:
10832
2350424585f8 [server] unormalize source logger's name to avoid encoding errors
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10805
diff changeset
   121
            cnx.entity_from_eid(dfsource.eid).cw_set(name=u"myrenamedfeed")
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   122
            cnx.commit()
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   123
            entity = cnx.execute('Card X').get_entity(0, 0)
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   124
            self.assertEqual(entity.cwuri, 'http://www.cubicweb.org/')
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   125
            self.assertEqual(entity.cw_source[0].name, 'myrenamedfeed')
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   126
            self.assertEqual(entity.cw_metainformation(),
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   127
                             {'type': 'Card',
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   128
                              'source': {'uri': 'myrenamedfeed', 'type': 'datafeed', 'use-cwuri-as-url': True},
10774
0361442e2633 [server/test] use bytes for extid
Julien Cristau <julien.cristau@logilab.fr>
parents: 10089
diff changeset
   129
                              'extid': b'http://www.cubicweb.org/'}
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   130
                             )
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   131
            self.assertEqual(self.repo._type_source_cache[entity.eid],
10774
0361442e2633 [server/test] use bytes for extid
Julien Cristau <julien.cristau@logilab.fr>
parents: 10089
diff changeset
   132
                             ('Card', b'http://www.cubicweb.org/', 'myrenamedfeed'))
0361442e2633 [server/test] use bytes for extid
Julien Cristau <julien.cristau@logilab.fr>
parents: 10089
diff changeset
   133
            self.assertEqual(self.repo._extid_cache[b'http://www.cubicweb.org/'],
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   134
                             entity.eid)
7735
71adfd6bab38 [source] update system table and internal structures on source renaming. Closes #1896721
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7717
diff changeset
   135
9798
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   136
            # test_delete_source
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   137
            cnx.execute('DELETE CWSource S WHERE S name "myrenamedfeed"')
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   138
            cnx.commit()
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   139
            self.assertFalse(cnx.execute('Card X WHERE X title "cubicweb.org"'))
1c105cf42904 [tests/datafeed] use the new connection api
Aurelien Campeas <aurelien.campeas@logilab.fr>
parents: 9469
diff changeset
   140
            self.assertFalse(cnx.execute('Any X WHERE X has_text "cubicweb.org"'))
7650
278fe9c1f3ad [repo] closes #1821172: fix source deletion
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 7631
diff changeset
   141
9824
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   142
    def test_parser_retrieve_url_local(self):
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   143
        with self.admin_access.repo_cnx() as cnx:
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   144
            with self.base_parser(cnx) as parser:
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   145
                value = parser.retrieve_url('a string')
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   146
                self.assertEqual(200, value.getcode())
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   147
                self.assertEqual('a string', value.geturl())
30183ecf5c61 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9822
diff changeset
   148
10956
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   149
    def test_update_url(self):
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   150
        dfsource = self.repo.sources_by_uri[u'ô myfeed']
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   151
        with self.admin_access.repo_cnx() as cnx:
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   152
            cnx.entity_from_eid(dfsource.eid).cw_set(url=u"http://pouet.com\nhttp://pouet.org")
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   153
            self.assertEqual(dfsource.urls, [u'ignored'])
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   154
            cnx.commit()
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   155
        self.assertEqual(dfsource.urls, [u"http://pouet.com", u"http://pouet.org"])
208c9ac8edbb update repo sources on update of source's entities .url attribute
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 10832
diff changeset
   156
11740
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   157
    def test_parser_not_found(self):
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   158
        dfsource = self.repo.sources_by_uri[u'ô myfeed']
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   159
        with self.assertLogs('cubicweb.sources.o myfeed', level='ERROR') as cm:
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   160
            with self.repo.internal_cnx() as cnx:
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   161
                stats = dfsource.pull_data(cnx, force=True)
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   162
                importlog = cnx.find('CWDataImport').one().log
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   163
        self.assertIn('failed to load parser for', cm.output[0])
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   164
        self.assertEqual(stats, {})
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   165
        self.assertIn(u'failed to load parser for source &quot;ô myfeed&quot;',
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   166
                      importlog)
dabbb2a4a493 [datafeed] Complete the import log even if parser could not be found
Denis Laxalde <denis.laxalde@logilab.fr>
parents: 11252
diff changeset
   167
9822
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   168
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   169
class DataFeedConfigTC(CubicWebTC):
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   170
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   171
    def test_use_cwuri_as_url_override(self):
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   172
        with self.admin_access.client_cnx() as cnx:
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   173
            cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   174
                              parser=u'testparser', url=u'ignored',
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   175
                              config=u'use-cwuri-as-url=no')
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   176
            cnx.commit()
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   177
        dfsource = self.repo.sources_by_uri['myfeed']
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   178
        self.assertEqual(dfsource.use_cwuri_as_url, False)
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   179
        self.assertEqual({'type': u'datafeed', 'uri': u'myfeed', 'use-cwuri-as-url': False},
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   180
                         dfsource.public_config)
4a118bfd6ab4 [datafeed] Allow to override use_cwuri_as_url in configuration of a datafeed source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents: 9798
diff changeset
   181
6957
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   182
if __name__ == '__main__':
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   183
    from logilab.common.testlib import unittest_main
ffda12be2e9f [repository] #1460066: backport datafeed cube as cubicweb source
Sylvain Thénault <sylvain.thenault@logilab.fr>
parents:
diff changeset
   184
    unittest_main()