server/sources/datafeed.py
changeset 10757 f73a9a884534
parent 10662 10942ed172de
child 10758 a34edc1057c0
equal deleted inserted replaced
10756:3eb527ce5f0f 10757:f73a9a884534
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    17 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
    18 """datafeed sources: copy data from an external data stream into the system
    18 """datafeed sources: copy data from an external data stream into the system
    19 database
    19 database
    20 """
    20 """
    21 
    21 
    22 import StringIO
    22 from io import BytesIO
    23 from os.path import exists
    23 from os.path import exists
    24 from datetime import datetime, timedelta
    24 from datetime import datetime, timedelta
    25 
    25 
    26 from six.moves.urllib.parse import urlparse
    26 from six.moves.urllib.parse import urlparse
    27 from six.moves.urllib.request import Request, build_opener, HTTPCookieProcessor
    27 from six.moves.urllib.request import Request, build_opener, HTTPCookieProcessor
   344             cnx = cwproxy_for(url)
   344             cnx = cwproxy_for(url)
   345             cnx.timeout = self.source.http_timeout
   345             cnx.timeout = self.source.http_timeout
   346             self.source.info('Using cwclientlib for %s' % url)
   346             self.source.info('Using cwclientlib for %s' % url)
   347             resp = cnx.get(url)
   347             resp = cnx.get(url)
   348             resp.raise_for_status()
   348             resp.raise_for_status()
   349             return URLLibResponseAdapter(StringIO.StringIO(resp.text), url)
   349             return URLLibResponseAdapter(BytesIO(resp.text), url)
   350         except (ImportError, ValueError, EnvironmentError) as exc:
   350         except (ImportError, ValueError, EnvironmentError) as exc:
   351             # ImportError: not available
   351             # ImportError: not available
   352             # ValueError: no config entry found
   352             # ValueError: no config entry found
   353             # EnvironmentError: no cwclientlib config file found
   353             # EnvironmentError: no cwclientlib config file found
   354             self.source.debug(str(exc))
   354             self.source.debug(str(exc))
   358             self.source.info('GET %s', url)
   358             self.source.info('GET %s', url)
   359             req = Request(url)
   359             req = Request(url)
   360             return _OPENER.open(req, timeout=self.source.http_timeout)
   360             return _OPENER.open(req, timeout=self.source.http_timeout)
   361 
   361 
   362         # url is probably plain content
   362         # url is probably plain content
   363         return URLLibResponseAdapter(StringIO.StringIO(url), url)
   363         return URLLibResponseAdapter(BytesIO(url.encode('ascii')), url)
   364 
   364 
   365     def add_schema_config(self, schemacfg, checkonly=False):
   365     def add_schema_config(self, schemacfg, checkonly=False):
   366         """added CWSourceSchemaConfig, modify mapping accordingly"""
   366         """added CWSourceSchemaConfig, modify mapping accordingly"""
   367         msg = schemacfg._cw._("this parser doesn't use a mapping")
   367         msg = schemacfg._cw._("this parser doesn't use a mapping")
   368         raise ValidationError(schemacfg.eid, {None: msg})
   368         raise ValidationError(schemacfg.eid, {None: msg})
   557     def getcode(self):
   557     def getcode(self):
   558         return self.code
   558         return self.code
   559 
   559 
   560     def info(self):
   560     def info(self):
   561         from mimetools import Message
   561         from mimetools import Message
   562         return Message(StringIO.StringIO())
   562         return Message(BytesIO())
   563 
   563 
   564 # use a cookie enabled opener to use session cookie if any
   564 # use a cookie enabled opener to use session cookie if any
   565 _OPENER = build_opener()
   565 _OPENER = build_opener()
   566 try:
   566 try:
   567     from logilab.common import urllib2ext
   567     from logilab.common import urllib2ext