[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Tue, 25 Mar 2014 09:20:37 +0100
changeset 9824 30183ecf5c61
parent 9823 258d2f9f7d39
child 9825 946b483bc8a1
[datafeed parser] fix retrieve_url to always return urllib2.urlopen compatible output as one should expect to be able to call geturl/getcode/info on the returned object whatever the url he gave. Related to #3682069
server/sources/datafeed.py
server/test/unittest_datafeed.py
--- a/server/sources/datafeed.py	Thu Mar 20 08:55:44 2014 +0100
+++ b/server/sources/datafeed.py	Tue Mar 25 09:20:37 2014 +0100
@@ -324,8 +324,8 @@
             self.source.info('GET %s', url)
             return _OPENER.open(url, timeout=self.source.http_timeout)
         if url.startswith('file://'):
-            return open(url[7:])
-        return StringIO.StringIO(url)
+            return URLLibResponseAdapter(open(url[7:]), url)
+        return URLLibResponseAdapter(StringIO.StringIO(url), url)
 
     def add_schema_config(self, schemacfg, checkonly=False):
         """added CWSourceSchemaConfig, modify mapping accordingly"""
@@ -490,6 +490,27 @@
             return exists(extid[7:])
         return False
 
+
+class URLLibResponseAdapter(object):
+    """Thin wrapper to be used to fake a value returned by urllib2.urlopen"""
+    def __init__(self, stream, url, code=200):
+        self._stream = stream
+        self._url = url
+        self.code = code
+
+    def read(self, *args):
+        return self._stream.read(*args)
+
+    def geturl(self):
+        return self._url
+
+    def getcode(self):
+        return self.code
+
+    def info(self):
+        from mimetools import Message
+        return Message(StringIO.StringIO())
+
 # use a cookie enabled opener to use session cookie if any
 _OPENER = urllib2.build_opener()
 try:
--- a/server/test/unittest_datafeed.py	Thu Mar 20 08:55:44 2014 +0100
+++ b/server/test/unittest_datafeed.py	Tue Mar 25 09:20:37 2014 +0100
@@ -16,7 +16,9 @@
 # You should have received a copy of the GNU Lesser General Public License along
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
 
+import mimetools
 from datetime import timedelta
+from contextlib import contextmanager
 
 from cubicweb.devtools.testlib import CubicWebTC
 from cubicweb.server.sources import datafeed
@@ -25,10 +27,30 @@
 class DataFeedTC(CubicWebTC):
     def setup_database(self):
         with self.admin_access.repo_cnx() as cnx:
-            cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
-                              parser=u'testparser', url=u'ignored',
-                              config=u'synchronization-interval=1min')
-            cnx.commit()
+            with self.base_parser(cnx):
+                cnx.create_entity('CWSource', name=u'myfeed', type=u'datafeed',
+                                  parser=u'testparser', url=u'ignored',
+                                  config=u'synchronization-interval=1min')
+                cnx.commit()
+
+    @contextmanager
+    def base_parser(self, session):
+        class AParser(datafeed.DataFeedParser):
+            __regid__ = 'testparser'
+            def process(self, url, raise_on_error=False):
+                entity = self.extid2entity('http://www.cubicweb.org/', 'Card',
+                                           item={'title': u'cubicweb.org',
+                                                 'content': u'the cw web site'})
+                if not self.created_during_pull(entity):
+                    self.notify_updated(entity)
+            def before_entity_copy(self, entity, sourceparams):
+                entity.cw_edited.update(sourceparams['item'])
+
+        with self.temporary_appobjects(AParser):
+            if 'myfeed' in self.repo.sources_by_uri:
+                yield self.repo.sources_by_uri['myfeed']._get_parser(session)
+            else:
+                yield
 
     def test(self):
         self.assertIn('myfeed', self.repo.sources_by_uri)
@@ -41,20 +63,8 @@
         self.assertEqual(dfsource.synchro_interval, timedelta(seconds=60))
         self.assertFalse(dfsource.fresh())
 
-
-        class AParser(datafeed.DataFeedParser):
-            __regid__ = 'testparser'
-            def process(self, url, raise_on_error=False):
-                entity = self.extid2entity('http://www.cubicweb.org/', 'Card',
-                                           item={'title': u'cubicweb.org',
-                                                 'content': u'the cw web site'})
-                if not self.created_during_pull(entity):
-                    self.notify_updated(entity)
-            def before_entity_copy(self, entity, sourceparams):
-                entity.cw_edited.update(sourceparams['item'])
-
-        with self.temporary_appobjects(AParser):
-            with self.repo.internal_cnx() as cnx:
+        with self.repo.internal_cnx() as cnx:
+            with self.base_parser(cnx):
                 stats = dfsource.pull_data(cnx, force=True)
                 cnx.commit()
                 # test import stats
@@ -123,6 +133,14 @@
             self.assertFalse(cnx.execute('Card X WHERE X title "cubicweb.org"'))
             self.assertFalse(cnx.execute('Any X WHERE X has_text "cubicweb.org"'))
 
+    def test_parser_retrieve_url_local(self):
+        with self.admin_access.repo_cnx() as cnx:
+            with self.base_parser(cnx) as parser:
+                value = parser.retrieve_url('a string')
+                self.assertEqual(200, value.getcode())
+                self.assertEqual('a string', value.geturl())
+                self.assertIsInstance(value.info(), mimetools.Message)
+
 
 class DataFeedConfigTC(CubicWebTC):