# HG changeset patch # User Nicolas Chauvat # Date 1394792453 -3600 # Node ID aaf83cc07eedb3c19e3d96126f9a72cefa2d89fd # Parent 14452b344d19a258ed8a3fd18e5c0ced7c2fa929 [web] implement cross origin resource sharing (CORS) (closes #2491768) Partial implementation that is enough to get started but leaves out some of the advanced features like caching and non-simple methods and headers. diff -r 14452b344d19 -r aaf83cc07eed doc/3.19.rst --- a/doc/3.19.rst Wed Mar 12 16:02:44 2014 +0100 +++ b/doc/3.19.rst Fri Mar 14 11:20:53 2014 +0100 @@ -1,6 +1,13 @@ What's new in CubicWeb 3.19? ============================ +New functionalities +-------------------- + +* implement Cross Origin Resource Sharing (CORS) + (see `#2491768 `_) + + Behaviour Changes ----------------- diff -r 14452b344d19 -r aaf83cc07eed doc/book/en/admin/instance-config.rst --- a/doc/book/en/admin/instance-config.rst Wed Mar 12 16:02:44 2014 +0100 +++ b/doc/book/en/admin/instance-config.rst Fri Mar 14 11:20:53 2014 +0100 @@ -189,3 +189,38 @@ :`navigation.combobox-limit`: number of entities unrelated to show up on the drop-down lists of the sight on an editing entity view + +Cross-Origin Resource Sharing +----------------------------- + +CubicWeb provides some support for the CORS_ protocol. For now, the +provided implementation only deals with access to a CubicWeb instance +as a whole. Support for a finer granularity may be considered in the +future. + +Specificities of the provided implementation: + +- ``Access-Control-Allow-Credentials`` is always true +- ``Access-Control-Allow-Origin`` header in response will never be + ``*`` +- ``Access-Control-Expose-Headers`` can be configured globally (see below) +- ``Access-Control-Max-Age`` can be configured globally (see below) +- ``Access-Control-Allow-Methods`` can be configured globally (see below) +- ``Access-Control-Allow-Headers`` can be configured globally (see below) + + +A few parameters can be set to configure the CORS_ capabilities of CubicWeb. + +.. _CORS: http://www.w3.org/TR/cors/ + +:`access-control-allow-origin`: + comma-separated list of allowed origin domains or "*" for any domain +:`access-control-allow-methods`: + comma-separated list of allowed HTTP methods +:`access-control-max-age`: + maximum age of cross-origin resource sharing (in seconds) +:`access-control-allow-headers`: + comma-separated list of allowed HTTP custom headers (used in simple requests) +:`access-control-expose-headers`: + comma-separated list of allowed HTTP custom headers (used in preflight requests) + diff -r 14452b344d19 -r aaf83cc07eed web/application.py --- a/web/application.py Wed Mar 12 16:02:44 2014 +0100 +++ b/web/application.py Fri Mar 14 11:20:53 2014 +0100 @@ -36,7 +36,7 @@ AuthenticationError, NoSelectableObject, BadConnectionId, CW_EVENT_MANAGER) from cubicweb.repoapi import anonymous_cnx -from cubicweb.web import LOGGER, component +from cubicweb.web import LOGGER, component, cors from cubicweb.web import ( StatusResponse, DirectResponse, Redirect, NotFound, LogOut, RemoteCallFailed, InvalidSession, RequestError, PublishException) @@ -415,6 +415,7 @@ content = self.need_login_content(req) return content + def core_handle(self, req, path): """method called by the main publisher to process @@ -440,6 +441,8 @@ try: ### standard processing of the request try: + # apply CORS sanity checks + cors.process_request(req, self.vreg.config) ctrlid, rset = self.url_resolver.process(req, path) try: controller = self.vreg['controllers'].select(ctrlid, req, @@ -448,6 +451,10 @@ raise Unauthorized(req._('not authorized')) req.update_search_state() result = controller.publish(rset=rset) + except cors.CORSPreflight: + # Return directly an empty 200 + req.status_out = 200 + result = '' except StatusResponse as ex: warn('[3.16] StatusResponse is deprecated use req.status_out', DeprecationWarning, stacklevel=2) diff -r 14452b344d19 -r aaf83cc07eed web/cors.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/cors.py Fri Mar 14 11:20:53 2014 +0100 @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# copyright 2014 Logilab, PARIS + +"""A set of utility functions to handle CORS requests + +Unless specified, all references in this file are related to: + http://www.w3.org/TR/cors + +The provided implementation roughly follows: + http://www.html5rocks.com/static/images/cors_server_flowchart.png + +See also: + https://developer.mozilla.org/en-US/docs/HTTP/Access_control_CORS + +""" + +import urlparse + +from cubicweb.web import LOGGER +info = LOGGER.info + +class CORSFailed(Exception): + """Raised when cross origin resource sharing checks failed""" + + +class CORSPreflight(Exception): + """Raised when cross origin resource sharing checks detects the + request as a valid preflight request""" + + +def process_request(req, config): + """ + Process a request to apply CORS specification algorithms + + Check whether the CORS specification is respected and set corresponding + headers to ensure response complies with the specification. + + In case of non-compliance, no CORS-related header is set. + """ + base_url = urlparse.urlsplit(req.base_url()) + expected_host = '://'.join((base_url.scheme, base_url.netloc)) + if not req.get_header('Origin') or req.get_header('Origin') == expected_host: + # not a CORS request, nothing to do + return + try: + # handle cross origin resource sharing (CORS) + if req.http_method() == 'OPTIONS': + if req.get_header('Access-Control-Request-Method'): + # preflight CORS request + process_preflight(req, config) + else: # Simple CORS or actual request + process_simple(req, config) + except CORSFailed, exc: + info('Cross origin resource sharing failed: %s' % exc) + except CORSPreflight: + info('Cross origin resource sharing: valid Preflight request %s') + raise + +def process_preflight(req, config): + """cross origin resource sharing (preflight) + Cf http://www.w3.org/TR/cors/#resource-preflight-requests + """ + origin = check_origin(req, config) + allowed_methods = set(config['access-control-allow-methods']) + allowed_headers = set(config['access-control-allow-headers']) + try: + method = req.get_header('Access-Control-Request-Method') + except ValueError: + raise CORSFailed('Access-Control-Request-Method is incorrect') + if method not in allowed_methods: + raise CORSFailed('Method is not allowed') + try: + req.get_header('Access-Control-Request-Headers', ()) + except ValueError: + raise CORSFailed('Access-Control-Request-Headers is incorrect') + req.set_header('Access-Control-Allow-Methods', allowed_methods, raw=False) + req.set_header('Access-Control-Allow-Headers', allowed_headers, raw=False) + + process_common(req, config, origin) + raise CORSPreflight() + +def process_simple(req, config): + """Handle the Simple Cross-Origin Request case + """ + origin = check_origin(req, config) + exposed_headers = config['access-control-expose-headers'] + if exposed_headers: + req.set_header('Access-Control-Expose-Headers', exposed_headers, raw=False) + process_common(req, config, origin) + +def process_common(req, config, origin): + req.set_header('Access-Control-Allow-Origin', origin) + # in CW, we always support credential/authentication + req.set_header('Access-Control-Allow-Credentials', 'true') + +def check_origin(req, config): + origin = req.get_header('Origin').lower() + allowed_origins = config.get('access-control-allow-origin') + if not allowed_origins: + raise CORSFailed('access-control-allow-origin is not configured') + if '*' not in allowed_origins and origin not in allowed_origins: + raise CORSFailed('Origin is not allowed') + # bit of sanity check; see "6.3 Security" + myhost = urlparse.urlsplit(req.base_url()).netloc + host = req.get_header('Host') + if host != myhost: + info('cross origin resource sharing detected possible ' + 'DNS rebinding attack Host header != host of base_url: ' + '%s != %s' % (host, myhost)) + raise CORSFailed('Host header and hostname do not match') + # include "Vary: Origin" header (see 6.4) + req.set_header('Vary', 'Origin') + return origin + diff -r 14452b344d19 -r aaf83cc07eed web/http_headers.py --- a/web/http_headers.py Wed Mar 12 16:02:44 2014 +0100 +++ b/web/http_headers.py Fri Mar 14 11:20:53 2014 +0100 @@ -8,6 +8,7 @@ from calendar import timegm import base64 import re +import urlparse def dashCapitalize(s): ''' Capitalize a string, making sure to treat - as a word seperator ''' @@ -388,6 +389,35 @@ raise ValueError('single value required, not %s' % seq) return seq[0] +def parseHTTPMethod(method): + """Ensure a HTTP method is valid according the rfc2616, but extension-method ones""" + method = method.strip() + if method not in ("OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE", + "TRACE", "CONNECT"): + raise ValueError('Unsupported HTTP method %s' % method) + return method + +def parseAllowOrigin(origin): + """Ensure origin is a valid URL-base stuff, or null""" + if origin == 'null': + return origin + p = urlparse.urlparse(origin) + if p.params or p.query or p.username or p.path not in ('', '/'): + raise ValueError('Incorrect Accept-Control-Allow-Origin value %s' % origin) + if p.scheme not in ('http', 'https'): + raise ValueError('Unsupported Accept-Control-Allow-Origin URL scheme %s' % origin) + if not p.netloc: + raise ValueError('Accept-Control-Allow-Origin: host name cannot be unset (%s)' % origin) + return origin + +def parseAllowCreds(cred): + """Can be "true" """ + if cred: + cred = cred.lower() + if cred and cred != 'true': + raise ValueError('Accept-Control-Allow-Credentials can only be "true" (%s)' % cred) + return cred + ##### Generation utilities def quoteString(s): return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"') @@ -1454,6 +1484,12 @@ 'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset), 'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding), 'Accept-Language':(tokenize, listParser(parseAcceptQvalue), dict), + 'Access-Control-Allow-Origin': (last, parseAllowOrigin,), + 'Access-Control-Allow-Credentials': (last, parseAllowCreds,), + 'Access-Control-Allow-Methods': (tokenize, listParser(parseHTTPMethod), list), + 'Access-Control-Request-Method': (parseHTTPMethod, ), + 'Access-Control-Request-Headers': (filterTokens, ), + 'Access-Control-Expose-Headers': (filterTokens, ), 'Authorization': (last, parseAuthorization), 'Cookie':(parseCookie,), 'Expect':(tokenize, listParser(parseExpect), dict), @@ -1465,6 +1501,7 @@ 'If-Range':(parseIfRange,), 'If-Unmodified-Since':(last,parseDateTime), 'Max-Forwards':(last,int), + 'Origin': (last,), # 'Proxy-Authorization':str, # what is "credentials" 'Range':(tokenize, parseRange), 'Referer':(last,str), # TODO: URI object? @@ -1477,11 +1514,15 @@ 'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), 'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader), 'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), + 'Access-Control-Request-Method': (unique, str, singleHeader, ), + 'Access-Control-Expose-Headers': (listGenerator(str), ), + 'Access-Control-Allow-Headers': (listGenerator(str), ), 'Authorization': (generateAuthorization,), # what is "credentials" 'Cookie':(generateCookie,singleHeader), 'Expect':(iteritems, listGenerator(generateExpect), singleHeader), 'From':(unique, str,singleHeader), 'Host':(unique, str,singleHeader), + 'Origin': (unique, str, singleHeader), 'If-Match':(listGenerator(generateStarOrETag), singleHeader), 'If-Modified-Since':(generateDateTime,singleHeader), 'If-None-Match':(listGenerator(generateStarOrETag), singleHeader), diff -r 14452b344d19 -r aaf83cc07eed web/test/unittest_http.py --- a/web/test/unittest_http.py Wed Mar 12 16:02:44 2014 +0100 +++ b/web/test/unittest_http.py Fri Mar 14 11:20:53 2014 +0100 @@ -15,9 +15,13 @@ # # You should have received a copy of the GNU Lesser General Public License along # with CubicWeb. If not, see . + +import contextlib + from logilab.common.testlib import TestCase, unittest_main, tag, Tags from cubicweb.devtools.fake import FakeRequest +from cubicweb.devtools.testlib import CubicWebTC def _test_cache(hin, hout, method='GET'): @@ -290,5 +294,167 @@ self.assertEqual(value, [DATE]) +alloworig = 'access-control-allow-origin' +allowmethods = 'access-control-allow-methods' +allowheaders = 'access-control-allow-headers' +allowcreds = 'access-control-allow-credentials' +exposeheaders = 'access-control-expose-headers' +maxage = 'access-control-max-age' + +requestmethod = 'access-control-request-method' +requestheaders = 'access-control-request-headers' + +class _BaseAccessHeadersTC(CubicWebTC): + + @contextlib.contextmanager + def options(self, **options): + for k, values in options.items(): + self.config.set_option(k, values) + try: + yield + finally: + for k in options: + self.config.set_option(k, '') + def check_no_cors(self, req): + self.assertEqual(None, req.get_response_header(alloworig)) + self.assertEqual(None, req.get_response_header(allowmethods)) + self.assertEqual(None, req.get_response_header(allowheaders)) + self.assertEqual(None, req.get_response_header(allowcreds)) + self.assertEqual(None, req.get_response_header(exposeheaders)) + self.assertEqual(None, req.get_response_header(maxage)) + + +class SimpleAccessHeadersTC(_BaseAccessHeadersTC): + + def test_noaccess(self): + with self.admin_access.web_request() as req: + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_noorigin(self): + with self.options(**{alloworig: '*'}): + with self.admin_access.web_request() as req: + req = self.request() + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_origin_noaccess(self): + with self.admin_access.web_request() as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_origin_noaccess_bad_host(self): + with self.options(**{alloworig: '*'}): + with self.admin_access.web_request() as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'badhost.net') + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_explicit_origin_noaccess(self): + with self.options(**{alloworig: ['http://www.toto.org', 'http://othersite.fr']}): + with self.admin_access.web_request() as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'testing.fr') + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_origin_access(self): + with self.options(**{alloworig: '*'}): + with self.admin_access.web_request() as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'testing.fr') + data = self.app_handle_request(req) + self.assertEqual('http://www.cubicweb.org', + req.get_response_header(alloworig)) + + def test_explicit_origin_access(self): + with self.options(**{alloworig: ['http://www.cubicweb.org', 'http://othersite.fr']}): + with self.admin_access.web_request() as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'testing.fr') + data = self.app_handle_request(req) + self.assertEqual('http://www.cubicweb.org', + req.get_response_header(alloworig)) + + def test_origin_access_headers(self): + with self.options(**{alloworig: '*', + exposeheaders: ['ExposeHead1', 'ExposeHead2'], + allowheaders: ['AllowHead1', 'AllowHead2'], + allowmethods: ['GET', 'POST', 'OPTIONS']}): + with self.admin_access.web_request() as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'testing.fr') + data = self.app_handle_request(req) + self.assertEqual('http://www.cubicweb.org', + req.get_response_header(alloworig)) + self.assertEqual("true", + req.get_response_header(allowcreds)) + self.assertEqual(['ExposeHead1', 'ExposeHead2'], + req.get_response_header(exposeheaders)) + self.assertEqual(None, req.get_response_header(allowmethods)) + self.assertEqual(None, req.get_response_header(allowheaders)) + + +class PreflightAccessHeadersTC(_BaseAccessHeadersTC): + + def test_noaccess(self): + with self.admin_access.web_request(method='OPTIONS') as req: + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_noorigin(self): + with self.options(**{alloworig: '*'}): + with self.admin_access.web_request(method='OPTIONS') as req: + req = self.request() + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_origin_noaccess(self): + with self.admin_access.web_request(method='OPTIONS') as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_origin_noaccess_bad_host(self): + with self.options(**{alloworig: '*'}): + with self.admin_access.web_request(method='OPTIONS') as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'badhost.net') + data = self.app_handle_request(req) + self.check_no_cors(req) + + def test_origin_access(self): + with self.options(**{alloworig: '*', + exposeheaders: ['ExposeHead1', 'ExposeHead2'], + allowheaders: ['AllowHead1', 'AllowHead2'], + allowmethods: ['GET', 'POST', 'OPTIONS']}): + with self.admin_access.web_request(method='OPTIONS') as req: + req.set_request_header('Origin', 'http://www.cubicweb.org') + # in these tests, base_url is http://testing.fr/cubicweb/ + req.set_request_header('Host', 'testing.fr') + req.set_request_header(requestmethod, 'GET') + + data = self.app_handle_request(req) + self.assertEqual(200, req.status_out) + self.assertEqual('http://www.cubicweb.org', + req.get_response_header(alloworig)) + self.assertEqual("true", + req.get_response_header(allowcreds)) + self.assertEqual(set(['GET', 'POST', 'OPTIONS']), + req.get_response_header(allowmethods)) + self.assertEqual(set(['AllowHead1', 'AllowHead2']), + req.get_response_header(allowheaders)) + self.assertEqual(None, + req.get_response_header(exposeheaders)) + + if __name__ == '__main__': unittest_main() diff -r 14452b344d19 -r aaf83cc07eed web/webconfig.py --- a/web/webconfig.py Wed Mar 12 16:02:44 2014 +0100 +++ b/web/webconfig.py Fri Mar 14 11:20:53 2014 +0100 @@ -247,6 +247,36 @@ 'help': 'The static data resource directory path.', 'group': 'web', 'level': 2, }), + ('access-control-allow-origin', + {'type' : 'csv', + 'default': (), + 'help':('comma-separated list of allowed origin domains or "*" for any domain'), + 'group': 'web', 'level': 2, + }), + ('access-control-allow-methods', + {'type' : 'csv', + 'default': (), + 'help': ('comma-separated list of allowed HTTP methods'), + 'group': 'web', 'level': 2, + }), + ('access-control-max-age', + {'type' : 'int', + 'default': None, + 'help': ('maximum age of cross-origin resource sharing (in seconds)'), + 'group': 'web', 'level': 2, + }), + ('access-control-expose-headers', + {'type' : 'csv', + 'default': (), + 'help':('comma-separated list of HTTP headers the application declare in response to a preflight request'), + 'group': 'web', 'level': 2, + }), + ('access-control-allow-headers', + {'type' : 'csv', + 'default': (), + 'help':('comma-separated list of HTTP headers the application may set in the response'), + 'group': 'web', 'level': 2, + }), )) def __init__(self, *args, **kwargs):