cubicweb: comparison server/sources/datafeed.py

equal deleted inserted replaced

-:70ed2067fdb5
+:4c59409220b6
 import StringIO
 from os.path import exists
 from datetime import datetime, timedelta
 from base64 import b64decode
 from cookielib import CookieJar
+import urlparse
 from lxml import etree
 from cubicweb import RegistryNotFound, ObjectNotFound, ValidationError, UnknownEid
 from cubicweb.server.repository import preprocess_inlined_relations
 from cubicweb.server.sources import AbstractSource
 for mappedurl in URL_MAPPING:
 if url.startswith(mappedurl):
 return url.replace(mappedurl, URL_MAPPING[mappedurl], 1)
 return url
-def retrieve_url(self, url, data=None, headers=None):
+def retrieve_url(self, url):
 """Return stream linked by the given url:
 * HTTP urls will be normalized (see :meth:`normalize_url`)
 * handle file:// URL
 * other will be considered as plain content, useful for testing purpose
-"""
-if headers is None:
+For http URLs, it will try to find a cwclientlib config entry
-headers = {}
+(if available) and use it as requester.
-if url.startswith('http'):
+"""
-url = self.normalize_url(url)
+purl = urlparse.urlparse(url)
-if data:
+if purl.scheme == 'file':
-self.source.info('POST %s %s', url, data)
+return URLLibResponseAdapter(open(url[7:]), url)
-else:
-self.source.info('GET %s', url)
+url = self.normalize_url(url)
-req = urllib2.Request(url, data, headers)
+# first, try to use cwclientlib if it's available and if the
+# url matches a configuration entry in ~/.config/cwclientlibrc
+try:
+from cwclientlib import cwproxy_for
+# parse url again since it has been normalized
+cnx = cwproxy_for(url)
+cnx.timeout = self.source.http_timeout
+self.source.info('Using cwclientlib for %s' % url)
+resp = cnx.get(url)
+resp.raise_for_status()
+return URLLibResponseAdapter(StringIO.StringIO(resp.text), url)
+except (ImportError, ValueError) as exc:
+# ImportError: not available
+# ValueError: no config entry found
+self.source.debug(str(exc))
+# no chance with cwclientlib, fall back to former implementation
+if purl.scheme in ('http', 'https'):
+self.source.info('GET %s', url)
+req = urllib2.Request(url)
 return _OPENER.open(req, timeout=self.source.http_timeout)
-if url.startswith('file://'):
-return URLLibResponseAdapter(open(url[7:]), url)
+# url is probably plain content
 return URLLibResponseAdapter(StringIO.StringIO(url), url)
 def add_schema_config(self, schemacfg, checkonly=False):
 """added CWSourceSchemaConfig, modify mapping accordingly"""
 msg = schemacfg._cw._("this parser doesn't use a mapping")
 def process_item(self, *args, **kwargs):
 raise NotImplementedError
 def is_deleted(self, extid, etype, eid):
-if extid.startswith('http'):
+if exitd.startswith('file://'):
+return exists(exitd[7:])
+url = self.normalize_url(extid)
+# first, try to use cwclientlib if it's available and if the
+# url matches a configuration entry in ~/.config/cwclientlibrc
+try:
+from cwclientlib import cwproxy_for
+# parse url again since it has been normalized
+cnx = cwproxy_for(url)
+cnx.timeout = self.source.http_timeout
+self.source.info('Using cwclientlib for checking %s' % url)
+return cnx.get(url).status_code == 404
+except (ImportError, ValueError) as exc:
+# ImportError: not available
+# ValueError: no config entry found
+self.source.debug(str(exc))
+# no chance with cwclientlib, fall back to former implementation
+if urlparse.urlparse(url).scheme in ('http', 'https'):
 try:
-_OPENER.open(self.normalize_url(extid), # XXX HTTP HEAD request
+_OPENER.open(url, timeout=self.source.http_timeout)
-timeout=self.source.http_timeout)
 except urllib2.HTTPError as ex:
 if ex.code == 404:
 return True
-elif extid.startswith('file://'):
-return exists(extid[7:])
 return False
 class URLLibResponseAdapter(object):
 """Thin wrapper to be used to fake a value returned by urllib2.urlopen"""

changeset 10516	4c59409220b6
parent 10143	2eb06486273a
child 10522	1660a0fa4f43