[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse
authorSylvain Thénault <sylvain.thenault@logilab.fr>
Thu, 20 Mar 2014 08:55:44 +0100
changeset 9823 258d2f9f7d39
parent 9822 4a118bfd6ab4
child 9824 30183ecf5c61
[datafeed parser] factor out retrieve_url method from DataFeedXMLParser.parse Related to #3682069
server/sources/datafeed.py
--- a/server/sources/datafeed.py	Thu Mar 20 08:49:10 2014 +0100
+++ b/server/sources/datafeed.py	Thu Mar 20 08:55:44 2014 +0100
@@ -298,12 +298,35 @@
         self.stats = {'created': set(), 'updated': set(), 'checked': set()}
 
     def normalize_url(self, url):
-        from cubicweb.sobjects import URL_MAPPING # available after registration
+        """Normalize an url by looking if there is a replacement for it in
+        `cubicweb.sobjects.URL_MAPPING`.
+
+        This dictionary allow to redirect from one host to another, which may be
+        useful for example in case of test instance using production data, while
+        you don't want to load the external source nor to hack your `/etc/hosts`
+        file.
+        """
+        # local import mandatory, it's available after registration
+        from cubicweb.sobjects import URL_MAPPING
         for mappedurl in URL_MAPPING:
             if url.startswith(mappedurl):
                 return url.replace(mappedurl, URL_MAPPING[mappedurl], 1)
         return url
 
+    def retrieve_url(self, url):
+        """Return stream linked by the given url:
+        * HTTP urls will be normalized (see :meth:`normalize_url`)
+        * handle file:// URL
+        * other will be considered as plain content, useful for testing purpose
+        """
+        if url.startswith('http'):
+            url = self.normalize_url(url)
+            self.source.info('GET %s', url)
+            return _OPENER.open(url, timeout=self.source.http_timeout)
+        if url.startswith('file://'):
+            return open(url[7:])
+        return StringIO.StringIO(url)
+
     def add_schema_config(self, schemacfg, checkonly=False):
         """added CWSourceSchemaConfig, modify mapping accordingly"""
         msg = schemacfg._cw._("this parser doesn't use a mapping")
@@ -446,14 +469,7 @@
         return error
 
     def parse(self, url):
-        if url.startswith('http'):
-            url = self.normalize_url(url)
-            self.source.info('GET %s', url)
-            stream = _OPENER.open(url, timeout=self.source.http_timeout)
-        elif url.startswith('file://'):
-            stream = open(url[7:])
-        else:
-            stream = StringIO.StringIO(url)
+        stream = self.retrieve_url(url)
         return self.parse_etree(etree.parse(stream).getroot())
 
     def parse_etree(self, document):