[web] implement cross origin resource sharing (CORS) (closes #2491768)
authorNicolas Chauvat <nicolas.chauvat@logilab.fr>
Fri, 14 Mar 2014 11:20:53 +0100
changeset 9571 aaf83cc07eed
parent 9570 14452b344d19
child 9572 73b2410bdadc
[web] implement cross origin resource sharing (CORS) (closes #2491768) Partial implementation that is enough to get started but leaves out some of the advanced features like caching and non-simple methods and headers.
doc/3.19.rst
doc/book/en/admin/instance-config.rst
web/application.py
web/cors.py
web/http_headers.py
web/test/unittest_http.py
web/webconfig.py
--- a/doc/3.19.rst	Wed Mar 12 16:02:44 2014 +0100
+++ b/doc/3.19.rst	Fri Mar 14 11:20:53 2014 +0100
@@ -1,6 +1,13 @@
 What's new in CubicWeb 3.19?
 ============================
 
+New functionalities
+--------------------
+
+* implement Cross Origin Resource Sharing (CORS)
+  (see `#2491768 <http://www.cubicweb.org/2491768>`_)
+
+
 Behaviour Changes
 -----------------
 
--- a/doc/book/en/admin/instance-config.rst	Wed Mar 12 16:02:44 2014 +0100
+++ b/doc/book/en/admin/instance-config.rst	Fri Mar 14 11:20:53 2014 +0100
@@ -189,3 +189,38 @@
 :`navigation.combobox-limit`:
     number of entities unrelated to show up on the drop-down lists of
     the sight on an editing entity view
+
+Cross-Origin Resource Sharing
+-----------------------------
+
+CubicWeb provides some support for the CORS_ protocol. For now, the
+provided implementation only deals with access to a CubicWeb instance
+as a whole. Support for a finer granularity may be considered in the
+future.
+
+Specificities of the provided implementation:
+
+- ``Access-Control-Allow-Credentials`` is always true
+- ``Access-Control-Allow-Origin`` header in response will never be
+  ``*``
+- ``Access-Control-Expose-Headers`` can be configured globally (see below)
+- ``Access-Control-Max-Age`` can be configured globally (see below)
+- ``Access-Control-Allow-Methods`` can be configured globally (see below)
+- ``Access-Control-Allow-Headers`` can be configured globally (see below)
+
+
+A few parameters can be set to configure the CORS_ capabilities of CubicWeb.
+
+.. _CORS: http://www.w3.org/TR/cors/
+
+:`access-control-allow-origin`:
+   comma-separated list of allowed origin domains or "*" for any domain
+:`access-control-allow-methods`:
+   comma-separated list of allowed HTTP methods
+:`access-control-max-age`:
+   maximum age of cross-origin resource sharing (in seconds)
+:`access-control-allow-headers`:
+   comma-separated list of allowed HTTP custom headers (used in simple requests)
+:`access-control-expose-headers`:
+   comma-separated list of allowed HTTP custom headers (used in preflight requests)
+
--- a/web/application.py	Wed Mar 12 16:02:44 2014 +0100
+++ b/web/application.py	Fri Mar 14 11:20:53 2014 +0100
@@ -36,7 +36,7 @@
     AuthenticationError, NoSelectableObject,
     BadConnectionId, CW_EVENT_MANAGER)
 from cubicweb.repoapi import anonymous_cnx
-from cubicweb.web import LOGGER, component
+from cubicweb.web import LOGGER, component, cors
 from cubicweb.web import (
     StatusResponse, DirectResponse, Redirect, NotFound, LogOut,
     RemoteCallFailed, InvalidSession, RequestError, PublishException)
@@ -415,6 +415,7 @@
                     content = self.need_login_content(req)
         return content
 
+
     def core_handle(self, req, path):
         """method called by the main publisher to process <path>
 
@@ -440,6 +441,8 @@
         try:
             ### standard processing of the request
             try:
+                # apply CORS sanity checks
+                cors.process_request(req, self.vreg.config)
                 ctrlid, rset = self.url_resolver.process(req, path)
                 try:
                     controller = self.vreg['controllers'].select(ctrlid, req,
@@ -448,6 +451,10 @@
                     raise Unauthorized(req._('not authorized'))
                 req.update_search_state()
                 result = controller.publish(rset=rset)
+            except cors.CORSPreflight:
+                # Return directly an empty 200
+                req.status_out = 200
+                result = ''
             except StatusResponse as ex:
                 warn('[3.16] StatusResponse is deprecated use req.status_out',
                      DeprecationWarning, stacklevel=2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/cors.py	Fri Mar 14 11:20:53 2014 +0100
@@ -0,0 +1,114 @@
+# -*- coding: utf-8 -*-
+# copyright 2014 Logilab, PARIS
+
+"""A set of utility functions to handle CORS requests
+
+Unless specified, all references in this file are related to:
+  http://www.w3.org/TR/cors
+
+The provided implementation roughly follows:
+  http://www.html5rocks.com/static/images/cors_server_flowchart.png
+
+See also:
+  https://developer.mozilla.org/en-US/docs/HTTP/Access_control_CORS
+
+"""
+
+import urlparse
+
+from cubicweb.web import LOGGER
+info = LOGGER.info
+
+class CORSFailed(Exception):
+    """Raised when cross origin resource sharing checks failed"""
+
+
+class CORSPreflight(Exception):
+    """Raised when cross origin resource sharing checks detects the
+    request as a valid preflight request"""
+
+
+def process_request(req, config):
+    """
+    Process a request to apply CORS specification algorithms
+
+    Check whether the CORS specification is respected and set corresponding
+    headers to ensure response complies with the specification.
+
+    In case of non-compliance, no CORS-related header is set.
+    """
+    base_url = urlparse.urlsplit(req.base_url())
+    expected_host = '://'.join((base_url.scheme, base_url.netloc))
+    if not req.get_header('Origin') or req.get_header('Origin') == expected_host:
+        # not a CORS request, nothing to do
+        return
+    try:
+        # handle cross origin resource sharing (CORS)
+        if req.http_method() == 'OPTIONS':
+            if req.get_header('Access-Control-Request-Method'):
+                # preflight CORS request
+                process_preflight(req, config)
+        else: # Simple CORS or actual request
+            process_simple(req, config)
+    except CORSFailed, exc:
+        info('Cross origin resource sharing failed: %s' % exc)
+    except CORSPreflight:
+        info('Cross origin resource sharing: valid Preflight request %s')
+        raise
+
+def process_preflight(req, config):
+    """cross origin resource sharing (preflight)
+    Cf http://www.w3.org/TR/cors/#resource-preflight-requests
+    """
+    origin = check_origin(req, config)
+    allowed_methods = set(config['access-control-allow-methods'])
+    allowed_headers = set(config['access-control-allow-headers'])
+    try:
+        method = req.get_header('Access-Control-Request-Method')
+    except ValueError:
+        raise CORSFailed('Access-Control-Request-Method is incorrect')
+    if method not in allowed_methods:
+        raise CORSFailed('Method is not allowed')
+    try:
+        req.get_header('Access-Control-Request-Headers', ())
+    except ValueError:
+        raise CORSFailed('Access-Control-Request-Headers is incorrect')
+    req.set_header('Access-Control-Allow-Methods', allowed_methods, raw=False)
+    req.set_header('Access-Control-Allow-Headers', allowed_headers, raw=False)
+
+    process_common(req, config, origin)
+    raise CORSPreflight()
+
+def process_simple(req, config):
+    """Handle the Simple Cross-Origin Request case
+    """
+    origin = check_origin(req, config)
+    exposed_headers = config['access-control-expose-headers']
+    if exposed_headers:
+        req.set_header('Access-Control-Expose-Headers', exposed_headers, raw=False)
+    process_common(req, config, origin)
+
+def process_common(req, config, origin):
+    req.set_header('Access-Control-Allow-Origin', origin)
+    # in CW, we always support credential/authentication
+    req.set_header('Access-Control-Allow-Credentials', 'true')
+
+def check_origin(req, config):
+    origin = req.get_header('Origin').lower()
+    allowed_origins = config.get('access-control-allow-origin')
+    if not allowed_origins:
+        raise CORSFailed('access-control-allow-origin is not configured')
+    if '*' not in allowed_origins and origin not in allowed_origins:
+        raise CORSFailed('Origin is not allowed')
+    # bit of sanity check; see "6.3 Security"
+    myhost = urlparse.urlsplit(req.base_url()).netloc
+    host = req.get_header('Host')
+    if host != myhost:
+        info('cross origin resource sharing detected possible '
+             'DNS rebinding attack Host header != host of base_url: '
+             '%s != %s' % (host, myhost))
+        raise CORSFailed('Host header and hostname do not match')
+    # include "Vary: Origin" header (see 6.4)
+    req.set_header('Vary', 'Origin')
+    return origin
+
--- a/web/http_headers.py	Wed Mar 12 16:02:44 2014 +0100
+++ b/web/http_headers.py	Fri Mar 14 11:20:53 2014 +0100
@@ -8,6 +8,7 @@
 from calendar import timegm
 import base64
 import re
+import urlparse
 
 def dashCapitalize(s):
     ''' Capitalize a string, making sure to treat - as a word seperator '''
@@ -388,6 +389,35 @@
         raise ValueError('single value required, not %s' % seq)
     return seq[0]
 
+def parseHTTPMethod(method):
+    """Ensure a HTTP method is valid according the rfc2616, but extension-method ones"""
+    method = method.strip()
+    if method not in ("OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE",
+                      "TRACE", "CONNECT"):
+        raise ValueError('Unsupported HTTP method %s' % method)
+    return method
+
+def parseAllowOrigin(origin):
+    """Ensure origin is a valid URL-base stuff, or null"""
+    if origin == 'null':
+        return origin
+    p = urlparse.urlparse(origin)
+    if p.params or p.query or p.username or p.path not in ('', '/'):
+        raise ValueError('Incorrect Accept-Control-Allow-Origin value %s' % origin)
+    if p.scheme not in ('http', 'https'):
+        raise ValueError('Unsupported Accept-Control-Allow-Origin URL scheme %s' % origin)
+    if not p.netloc:
+        raise ValueError('Accept-Control-Allow-Origin: host name cannot be unset  (%s)' % origin)
+    return origin
+
+def parseAllowCreds(cred):
+    """Can be "true" """
+    if cred:
+        cred = cred.lower()
+    if cred and cred != 'true':
+        raise ValueError('Accept-Control-Allow-Credentials can only be "true" (%s)' % cred)
+    return cred
+
 ##### Generation utilities
 def quoteString(s):
     return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"')
@@ -1454,6 +1484,12 @@
     'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset),
     'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding),
     'Accept-Language':(tokenize, listParser(parseAcceptQvalue), dict),
+    'Access-Control-Allow-Origin': (last, parseAllowOrigin,),
+    'Access-Control-Allow-Credentials': (last, parseAllowCreds,),
+    'Access-Control-Allow-Methods': (tokenize, listParser(parseHTTPMethod), list),
+    'Access-Control-Request-Method': (parseHTTPMethod, ),
+    'Access-Control-Request-Headers': (filterTokens, ),
+    'Access-Control-Expose-Headers': (filterTokens, ),
     'Authorization': (last, parseAuthorization),
     'Cookie':(parseCookie,),
     'Expect':(tokenize, listParser(parseExpect), dict),
@@ -1465,6 +1501,7 @@
     'If-Range':(parseIfRange,),
     'If-Unmodified-Since':(last,parseDateTime),
     'Max-Forwards':(last,int),
+    'Origin': (last,),
 #    'Proxy-Authorization':str, # what is "credentials"
     'Range':(tokenize, parseRange),
     'Referer':(last,str), # TODO: URI object?
@@ -1477,11 +1514,15 @@
     'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
     'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader),
     'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
+    'Access-Control-Request-Method': (unique, str, singleHeader, ),
+    'Access-Control-Expose-Headers': (listGenerator(str), ),
+    'Access-Control-Allow-Headers': (listGenerator(str), ),
     'Authorization': (generateAuthorization,), # what is "credentials"
     'Cookie':(generateCookie,singleHeader),
     'Expect':(iteritems, listGenerator(generateExpect), singleHeader),
     'From':(unique, str,singleHeader),
     'Host':(unique, str,singleHeader),
+    'Origin': (unique, str, singleHeader),
     'If-Match':(listGenerator(generateStarOrETag), singleHeader),
     'If-Modified-Since':(generateDateTime,singleHeader),
     'If-None-Match':(listGenerator(generateStarOrETag), singleHeader),
--- a/web/test/unittest_http.py	Wed Mar 12 16:02:44 2014 +0100
+++ b/web/test/unittest_http.py	Fri Mar 14 11:20:53 2014 +0100
@@ -15,9 +15,13 @@
 #
 # You should have received a copy of the GNU Lesser General Public License along
 # with CubicWeb.  If not, see <http://www.gnu.org/licenses/>.
+
+import contextlib
+
 from logilab.common.testlib import TestCase, unittest_main, tag, Tags
 
 from cubicweb.devtools.fake import FakeRequest
+from cubicweb.devtools.testlib import CubicWebTC
 
 
 def _test_cache(hin, hout, method='GET'):
@@ -290,5 +294,167 @@
         self.assertEqual(value, [DATE])
 
 
+alloworig = 'access-control-allow-origin'
+allowmethods = 'access-control-allow-methods'
+allowheaders = 'access-control-allow-headers'
+allowcreds = 'access-control-allow-credentials'
+exposeheaders = 'access-control-expose-headers'
+maxage = 'access-control-max-age'
+
+requestmethod = 'access-control-request-method'
+requestheaders = 'access-control-request-headers'
+
+class _BaseAccessHeadersTC(CubicWebTC):
+
+    @contextlib.contextmanager
+    def options(self, **options):
+        for k, values in options.items():
+            self.config.set_option(k, values)
+        try:
+            yield
+        finally:
+            for k in options:
+                self.config.set_option(k, '')
+    def check_no_cors(self, req):
+        self.assertEqual(None, req.get_response_header(alloworig))
+        self.assertEqual(None, req.get_response_header(allowmethods))
+        self.assertEqual(None, req.get_response_header(allowheaders))
+        self.assertEqual(None, req.get_response_header(allowcreds))
+        self.assertEqual(None, req.get_response_header(exposeheaders))
+        self.assertEqual(None, req.get_response_header(maxage))
+
+
+class SimpleAccessHeadersTC(_BaseAccessHeadersTC):
+
+    def test_noaccess(self):
+        with self.admin_access.web_request() as req:
+            data = self.app_handle_request(req)
+            self.check_no_cors(req)
+
+    def test_noorigin(self):
+        with self.options(**{alloworig: '*'}):
+            with self.admin_access.web_request() as req:
+                req = self.request()
+                data = self.app_handle_request(req)
+                self.check_no_cors(req)
+
+    def test_origin_noaccess(self):
+        with self.admin_access.web_request() as req:
+            req.set_request_header('Origin', 'http://www.cubicweb.org')
+            data = self.app_handle_request(req)
+            self.check_no_cors(req)
+
+    def test_origin_noaccess_bad_host(self):
+        with self.options(**{alloworig: '*'}):
+            with self.admin_access.web_request() as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'badhost.net')
+                data = self.app_handle_request(req)
+                self.check_no_cors(req)
+
+    def test_explicit_origin_noaccess(self):
+        with self.options(**{alloworig: ['http://www.toto.org', 'http://othersite.fr']}):
+            with self.admin_access.web_request() as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'testing.fr')
+                data = self.app_handle_request(req)
+                self.check_no_cors(req)
+
+    def test_origin_access(self):
+        with self.options(**{alloworig: '*'}):
+            with self.admin_access.web_request() as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'testing.fr')
+                data = self.app_handle_request(req)
+                self.assertEqual('http://www.cubicweb.org',
+                                 req.get_response_header(alloworig))
+
+    def test_explicit_origin_access(self):
+        with self.options(**{alloworig: ['http://www.cubicweb.org', 'http://othersite.fr']}):
+            with self.admin_access.web_request() as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'testing.fr')
+                data = self.app_handle_request(req)
+                self.assertEqual('http://www.cubicweb.org',
+                                 req.get_response_header(alloworig))
+
+    def test_origin_access_headers(self):
+        with self.options(**{alloworig: '*',
+                             exposeheaders: ['ExposeHead1', 'ExposeHead2'],
+                             allowheaders: ['AllowHead1', 'AllowHead2'],
+                             allowmethods: ['GET', 'POST', 'OPTIONS']}):
+            with self.admin_access.web_request() as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'testing.fr')
+                data = self.app_handle_request(req)
+                self.assertEqual('http://www.cubicweb.org',
+                                 req.get_response_header(alloworig))
+                self.assertEqual("true",
+                                 req.get_response_header(allowcreds))
+                self.assertEqual(['ExposeHead1', 'ExposeHead2'],
+                                 req.get_response_header(exposeheaders))
+                self.assertEqual(None, req.get_response_header(allowmethods))
+                self.assertEqual(None, req.get_response_header(allowheaders))
+
+
+class PreflightAccessHeadersTC(_BaseAccessHeadersTC):
+
+    def test_noaccess(self):
+        with self.admin_access.web_request(method='OPTIONS') as req:
+            data = self.app_handle_request(req)
+            self.check_no_cors(req)
+
+    def test_noorigin(self):
+        with self.options(**{alloworig: '*'}):
+            with self.admin_access.web_request(method='OPTIONS') as req:
+                req = self.request()
+                data = self.app_handle_request(req)
+                self.check_no_cors(req)
+
+    def test_origin_noaccess(self):
+        with self.admin_access.web_request(method='OPTIONS') as req:
+            req.set_request_header('Origin', 'http://www.cubicweb.org')
+            data = self.app_handle_request(req)
+            self.check_no_cors(req)
+
+    def test_origin_noaccess_bad_host(self):
+        with self.options(**{alloworig: '*'}):
+            with self.admin_access.web_request(method='OPTIONS') as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'badhost.net')
+                data = self.app_handle_request(req)
+                self.check_no_cors(req)
+
+    def test_origin_access(self):
+        with self.options(**{alloworig: '*',
+                             exposeheaders: ['ExposeHead1', 'ExposeHead2'],
+                             allowheaders: ['AllowHead1', 'AllowHead2'],
+                             allowmethods: ['GET', 'POST', 'OPTIONS']}):
+            with self.admin_access.web_request(method='OPTIONS') as req:
+                req.set_request_header('Origin', 'http://www.cubicweb.org')
+                # in these tests, base_url is http://testing.fr/cubicweb/
+                req.set_request_header('Host', 'testing.fr')
+                req.set_request_header(requestmethod, 'GET')
+
+                data = self.app_handle_request(req)
+                self.assertEqual(200, req.status_out)
+                self.assertEqual('http://www.cubicweb.org',
+                                 req.get_response_header(alloworig))
+                self.assertEqual("true",
+                                 req.get_response_header(allowcreds))
+                self.assertEqual(set(['GET', 'POST', 'OPTIONS']),
+                                 req.get_response_header(allowmethods))
+                self.assertEqual(set(['AllowHead1', 'AllowHead2']),
+                                 req.get_response_header(allowheaders))
+                self.assertEqual(None,
+                                 req.get_response_header(exposeheaders))
+
+
 if __name__ == '__main__':
     unittest_main()
--- a/web/webconfig.py	Wed Mar 12 16:02:44 2014 +0100
+++ b/web/webconfig.py	Fri Mar 14 11:20:53 2014 +0100
@@ -247,6 +247,36 @@
           'help': 'The static data resource directory path.',
           'group': 'web', 'level': 2,
           }),
+        ('access-control-allow-origin',
+         {'type' : 'csv',
+          'default': (),
+          'help':('comma-separated list of allowed origin domains or "*" for any domain'),
+          'group': 'web', 'level': 2,
+          }),
+        ('access-control-allow-methods',
+         {'type' : 'csv',
+          'default': (),
+          'help': ('comma-separated list of allowed HTTP methods'),
+          'group': 'web', 'level': 2,
+          }),
+        ('access-control-max-age',
+         {'type' : 'int',
+          'default': None,
+          'help': ('maximum age of cross-origin resource sharing (in seconds)'),
+          'group': 'web', 'level': 2,
+          }),
+        ('access-control-expose-headers',
+         {'type' : 'csv',
+          'default': (),
+          'help':('comma-separated list of HTTP headers the application declare in response to a preflight request'),
+          'group': 'web', 'level': 2,
+          }),
+        ('access-control-allow-headers',
+         {'type' : 'csv',
+          'default': (),
+          'help':('comma-separated list of HTTP headers the application may set in the response'),
+          'group': 'web', 'level': 2,
+          }),
         ))
 
     def __init__(self, *args, **kwargs):