# HG changeset patch # User Sylvain Thénault # Date 1395735637 -3600 # Node ID 30183ecf5c619bda8145061a9a9ec3139812e93e # Parent 258d2f9f7d39e8abec7ab371a9e2088a06e8ad82 [datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output as one should expect to be able to call geturl/getcode/info on the returned object whatever the url he gave. Related to #3682069 diff -r 258d2f9f7d39 -r 30183ecf5c61 server/sources/datafeed.py --- a/server/sources/datafeed.py Thu Mar 20 08:55:44 2014 +0100 +++ b/server/sources/datafeed.py Tue Mar 25 09:20:37 2014 +0100 @@ -324,8 +324,8 @@ self.source.info('GET %s', url) return _OPENER.open(url, timeout=self.source.http_timeout) if url.startswith('file://'): - return open(url[7:]) - return StringIO.StringIO(url) + return URLLibResponseAdapter(open(url[7:]), url) + return URLLibResponseAdapter(StringIO.StringIO(url), url) def add_schema_config(self, schemacfg, checkonly=False): """added CWSourceSchemaConfig, modify mapping accordingly""" @@ -490,6 +490,27 @@ return exists(extid[7:]) return False + +class URLLibResponseAdapter(object): + """Thin wrapper to be used to fake a value returned by urllib2.urlopen""" + def __init__(self, stream, url, code=200): + self._stream = stream + self._url = url + self.code = code + + def read(self, *args): + return self._stream.read(*args) + + def geturl(self): + return self._url + + def getcode(self): + return self.code + + def info(self): + from mimetools import Message + return Message(StringIO.StringIO()) + # use a cookie enabled opener to use session cookie if any _OPENER = urllib2.build_opener() try: diff -r 258d2f9f7d39 -r 30183ecf5c61 server/test/unittest_datafeed.py --- a/server/test/unittest_datafeed.py Thu Mar 20 08:55:44 2014 +0100 +++ b/server/test/unittest_datafeed.py Tue Mar 25 09:20:37 2014 +0100 @@ -16,7 +16,9 @@ # You should have received a copy of the GNU Lesser General Public License along # with CubicWeb. If not, see . +import mimetools from datetime import timedelta +from contextlib import contextmanager from cubicweb.devtools.testlib import CubicWebTC from cubicweb.server.sources import datafeed @@ -25,10 +27,30 @@ class DataFeedTC(CubicWebTC): def setup_database(self): with self.admin_access.repo_cnx() as cnx: - cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed', - parser=u'testparser', url=u'ignored', - config=u'synchronization-interval=1min') - cnx.commit() + with self.base_parser(cnx): + cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed', + parser=u'testparser', url=u'ignored', + config=u'synchronization-interval=1min') + cnx.commit() + + @contextmanager + def base_parser(self, session): + class AParser(datafeed.DataFeedParser): + __regid__ = 'testparser' + def process(self, url, raise_on_error=False): + entity = self.extid2entity('http://www.cubicweb.org/', 'Card', + item={'title': u'cubicweb.org', + 'content': u'the cw web site'}) + if not self.created_during_pull(entity): + self.notify_updated(entity) + def before_entity_copy(self, entity, sourceparams): + entity.cw_edited.update(sourceparams['item']) + + with self.temporary_appobjects(AParser): + if 'myfeed' in self.repo.sources_by_uri: + yield self.repo.sources_by_uri['myfeed']._get_parser(session) + else: + yield def test(self): self.assertIn('myfeed', self.repo.sources_by_uri) @@ -41,20 +63,8 @@ self.assertEqual(dfsource.synchro_interval, timedelta(seconds=60)) self.assertFalse(dfsource.fresh()) - - class AParser(datafeed.DataFeedParser): - __regid__ = 'testparser' - def process(self, url, raise_on_error=False): - entity = self.extid2entity('http://www.cubicweb.org/', 'Card', - item={'title': u'cubicweb.org', - 'content': u'the cw web site'}) - if not self.created_during_pull(entity): - self.notify_updated(entity) - def before_entity_copy(self, entity, sourceparams): - entity.cw_edited.update(sourceparams['item']) - - with self.temporary_appobjects(AParser): - with self.repo.internal_cnx() as cnx: + with self.repo.internal_cnx() as cnx: + with self.base_parser(cnx): stats = dfsource.pull_data(cnx, force=True) cnx.commit() # test import stats @@ -123,6 +133,14 @@ self.assertFalse(cnx.execute('Card X WHERE X title "cubicweb.org"')) self.assertFalse(cnx.execute('Any X WHERE X has_text "cubicweb.org"')) + def test_parser_retrieve_url_local(self): + with self.admin_access.repo_cnx() as cnx: + with self.base_parser(cnx) as parser: + value = parser.retrieve_url('a string') + self.assertEqual(200, value.getcode()) + self.assertEqual('a string', value.geturl()) + self.assertIsInstance(value.info(), mimetools.Message) + class DataFeedConfigTC(CubicWebTC):