[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
as one should expect to be able to call geturl/getcode/info on the returned
object whatever the url he gave.
Related to #3682069
--- a/server/sources/datafeed.py Thu Mar 20 08:55:44 2014 +0100
+++ b/server/sources/datafeed.py Tue Mar 25 09:20:37 2014 +0100
@@ -324,8 +324,8 @@
self.source.info('GET %s', url)
return _OPENER.open(url, timeout=self.source.http_timeout)
if url.startswith('file://'):
- return open(url[7:])
- return StringIO.StringIO(url)
+ return URLLibResponseAdapter(open(url[7:]), url)
+ return URLLibResponseAdapter(StringIO.StringIO(url), url)
def add_schema_config(self, schemacfg, checkonly=False):
"""added CWSourceSchemaConfig, modify mapping accordingly"""
@@ -490,6 +490,27 @@
return exists(extid[7:])
return False
+
+class URLLibResponseAdapter(object):
+ """Thin wrapper to be used to fake a value returned by urllib2.urlopen"""
+ def __init__(self, stream, url, code=200):
+ self._stream = stream
+ self._url = url
+ self.code = code
+
+ def read(self, *args):
+ return self._stream.read(*args)
+
+ def geturl(self):
+ return self._url
+
+ def getcode(self):
+ return self.code
+
+ def info(self):
+ from mimetools import Message
+ return Message(StringIO.StringIO())
+
# use a cookie enabled opener to use session cookie if any
_OPENER = urllib2.build_opener()
try:
--- a/server/test/unittest_datafeed.py Thu Mar 20 08:55:44 2014 +0100
+++ b/server/test/unittest_datafeed.py Tue Mar 25 09:20:37 2014 +0100
@@ -16,7 +16,9 @@
# You should have received a copy of the GNU Lesser General Public License along
# with CubicWeb. If not, see <http://www.gnu.org/licenses/>.
+import mimetools
from datetime import timedelta
+from contextlib import contextmanager
from cubicweb.devtools.testlib import CubicWebTC
from cubicweb.server.sources import datafeed
@@ -25,10 +27,30 @@
class DataFeedTC(CubicWebTC):
def setup_database(self):
with self.admin_access.repo_cnx() as cnx:
- cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
- parser=u'testparser', url=u'ignored',
- config=u'synchronization-interval=1min')
- cnx.commit()
+ with self.base_parser(cnx):
+ cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
+ parser=u'testparser', url=u'ignored',
+ config=u'synchronization-interval=1min')
+ cnx.commit()
+
+ @contextmanager
+ def base_parser(self, session):
+ class AParser(datafeed.DataFeedParser):
+ __regid__ = 'testparser'
+ def process(self, url, raise_on_error=False):
+ entity = self.extid2entity('http://www.cubicweb.org/', 'Card',
+ item={'title': u'cubicweb.org',
+ 'content': u'the cw web site'})
+ if not self.created_during_pull(entity):
+ self.notify_updated(entity)
+ def before_entity_copy(self, entity, sourceparams):
+ entity.cw_edited.update(sourceparams['item'])
+
+ with self.temporary_appobjects(AParser):
+ if 'myfeed' in self.repo.sources_by_uri:
+ yield self.repo.sources_by_uri['myfeed']._get_parser(session)
+ else:
+ yield
def test(self):
self.assertIn('myfeed', self.repo.sources_by_uri)
@@ -41,20 +63,8 @@
self.assertEqual(dfsource.synchro_interval, timedelta(seconds=60))
self.assertFalse(dfsource.fresh())
-
- class AParser(datafeed.DataFeedParser):
- __regid__ = 'testparser'
- def process(self, url, raise_on_error=False):
- entity = self.extid2entity('http://www.cubicweb.org/', 'Card',
- item={'title': u'cubicweb.org',
- 'content': u'the cw web site'})
- if not self.created_during_pull(entity):
- self.notify_updated(entity)
- def before_entity_copy(self, entity, sourceparams):
- entity.cw_edited.update(sourceparams['item'])
-
- with self.temporary_appobjects(AParser):
- with self.repo.internal_cnx() as cnx:
+ with self.repo.internal_cnx() as cnx:
+ with self.base_parser(cnx):
stats = dfsource.pull_data(cnx, force=True)
cnx.commit()
# test import stats
@@ -123,6 +133,14 @@
self.assertFalse(cnx.execute('Card X WHERE X title "cubicweb.org"'))
self.assertFalse(cnx.execute('Any X WHERE X has_text "cubicweb.org"'))
+ def test_parser_retrieve_url_local(self):
+ with self.admin_access.repo_cnx() as cnx:
+ with self.base_parser(cnx) as parser:
+ value = parser.retrieve_url('a string')
+ self.assertEqual(200, value.getcode())
+ self.assertEqual('a string', value.geturl())
+ self.assertIsInstance(value.info(), mimetools.Message)
+
class DataFeedConfigTC(CubicWebTC):