# HG changeset patch # User Adrien Di Mascio # Date 1331830767 -3600 # Node ID d5b1b75805dd158f12ccc6f283338d78bd25197e # Parent 166e6d5d8e17d0d7c4a7117ba9e57bc0adc0375b [cache] factorize _validate_cache() logic implemented in wsgi and twisted handlers diff -r 166e6d5d8e17 -r d5b1b75805dd devtools/fake.py --- a/devtools/fake.py Thu Mar 15 17:57:40 2012 +0100 +++ b/devtools/fake.py Thu Mar 15 17:59:27 2012 +0100 @@ -79,10 +79,6 @@ def http_method(self): return self._http_method - - def header_if_modified_since(self): - return None - def relative_path(self, includeparams=True): """return the normalized path of the request (ie at least relative to the instance's root, but some other normalization may be needed @@ -114,9 +110,6 @@ return self.headers_out.getRawHeaders(header, [default])[0] return self.headers_out.getHeader(header, default) - def validate_cache(self): - pass - def build_url_params(self, **kwargs): # overriden to get predictable resultts args = [] diff -r 166e6d5d8e17 -r d5b1b75805dd etwist/http.py --- a/etwist/http.py Thu Mar 15 17:57:40 2012 +0100 +++ b/etwist/http.py Thu Mar 15 17:59:27 2012 +0100 @@ -43,19 +43,3 @@ def __repr__(self): return "<%s.%s code=%d>" % (self.__module__, self.__class__.__name__, self._code) - - -def not_modified_response(twisted_request, headers_in): - headers_out = Headers() - - for header in ( - # Required from sec 10.3.5: - 'date', 'etag', 'content-location', 'expires', - 'cache-control', 'vary', - # Others: - 'server', 'proxy-authenticate', 'www-authenticate', 'warning'): - value = headers_in.getRawHeaders(header) - if value is not None: - headers_out.setRawHeaders(header, value) - return HTTPResponse(twisted_request=twisted_request, - headers=headers_out) diff -r 166e6d5d8e17 -r d5b1b75805dd etwist/request.py --- a/etwist/request.py Thu Mar 15 17:57:40 2012 +0100 +++ b/etwist/request.py Thu Mar 15 17:59:27 2012 +0100 @@ -27,7 +27,6 @@ from cubicweb.web.request import CubicWebRequestBase from cubicweb.web.httpcache import GMTOFFSET from cubicweb.web.http_headers import Headers -from cubicweb.etwist.http import not_modified_response class CubicWebTwistedRequestAdapter(CubicWebRequestBase): @@ -57,30 +56,3 @@ if not includeparams: path = path.split('?', 1)[0] return path - - def _validate_cache(self): - """raise a `DirectResponse` exception if a cached page along the way - exists and is still usable - """ - if self.get_header('Cache-Control') in ('max-age=0', 'no-cache'): - # Expires header seems to be required by IE7 - self.add_header('Expires', 'Sat, 01 Jan 2000 00:00:00 GMT') - return - # when using both 'Last-Modified' and 'ETag' response headers - # (i.e. using respectively If-Modified-Since and If-None-Match request - # headers, see - # http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3.4 for - # reference - last_modified = self.headers_out.getHeader('last-modified') - if last_modified is not None: - status = self._twreq.setLastModified(last_modified) - if status != http.CACHED: - return - etag = self.headers_out.getRawHeaders('etag') - if etag is not None: - status = self._twreq.setETag(etag[0]) - if status == http.CACHED: - response = not_modified_response(self._twreq, self._headers_in) - raise DirectResponse(response) - # Expires header seems to be required by IE7 - self.add_header('Expires', 'Sat, 01 Jan 2000 00:00:00 GMT') diff -r 166e6d5d8e17 -r d5b1b75805dd web/httpcache.py --- a/web/httpcache.py Thu Mar 15 17:57:40 2012 +0100 +++ b/web/httpcache.py Thu Mar 15 17:59:27 2012 +0100 @@ -147,3 +147,39 @@ viewmod.StartupView.http_cache_manager = MaxAgeHTTPCacheManager viewmod.StartupView.cache_max_age = 60*60*2 # stay in http cache for 2 hours by default + + +### HTTP Cache validator ############################################ + + + +def get_validators(headers_in): + """return a list of http condition validator relevant to this request + """ + result = [] + for header, func in VALIDATORS: + value = headers_in.getHeader(header) + if value is not None: + result.append((func, value)) + return result + + +def if_modified_since(ref_date, headers_out): + last_modified = headers_out.getHeader('last-modified') + if last_modified is None: + return True + return ref_date < last_modified + +def if_none_match(tags, headers_out): + etag = headers_out.getHeader('etag') + if etag is None: + return True + return not ((etag in tags) or ('*' in tags)) + +VALIDATORS = [ + ('if-modified-since', if_modified_since), + #('if-unmodified-since', if_unmodified_since), + ('if-none-match', if_none_match), + #('if-modified-since', if_modified_since), +] + diff -r 166e6d5d8e17 -r d5b1b75805dd web/request.py --- a/web/request.py Thu Mar 15 17:57:40 2012 +0100 +++ b/web/request.py Thu Mar 15 17:59:27 2012 +0100 @@ -27,6 +27,7 @@ from calendar import timegm from datetime import date, datetime from urlparse import urlsplit +import httplib from itertools import count from warnings import warn @@ -43,8 +44,8 @@ from cubicweb.view import STRICT_DOCTYPE, TRANSITIONAL_DOCTYPE_NOEXT from cubicweb.web import (INTERNAL_FIELD_VALUE, LOGGER, NothingToEdit, RequestError, StatusResponse) -from cubicweb.web.httpcache import GMTOFFSET -from cubicweb.web.http_headers import Headers, Cookie +from cubicweb.web.httpcache import GMTOFFSET, get_validators +from cubicweb.web.http_headers import Headers, Cookie, parseDateTime _MARKER = object() @@ -750,14 +751,33 @@ return 'view' def validate_cache(self): - """raise a `DirectResponse` exception if a cached page along the way + """raise a `StatusResponse` exception if a cached page along the way exists and is still usable. calls the client-dependant implementation of `_validate_cache` """ - self._validate_cache() - if self.http_method() == 'HEAD': - raise StatusResponse(200, '') + modified = True + if self.get_header('Cache-Control') not in ('max-age=0', 'no-cache'): + # Here, we search for any invalid 'not modified' condition + # see http://www.w3.org/Protocols/rfc2616/rfc2616-sec13.html#sec13.3 + validators = get_validators(self._headers_in) + if validators: # if we have no + modified = any(func(val, self.headers_out) for func, val in validators) + # Forge expected response + if modified: + if 'Expires' not in self.headers_out: + # Expires header seems to be required by IE7 -- Are you sure ? + self.add_header('Expires', 'Sat, 01 Jan 2000 00:00:00 GMT') + if self.http_method() == 'HEAD': + raise StatusResponse(200, '') + # /!\ no raise, the function returns and we keep processing the request) + else: + # overwrite headers_out to forge a brand new not-modified response + self.headers_out = self._forge_cached_headers() + if self.http_method() in ('HEAD', 'GET'): + raise StatusResponse(httplib.NOT_MODIFIED) + else: + raise StatusResponse(httplib.PRECONDITION_FAILED) # abstract methods to override according to the web front-end ############# @@ -765,11 +785,19 @@ """returns 'POST', 'GET', 'HEAD', etc.""" raise NotImplementedError() - def _validate_cache(self): - """raise a `DirectResponse` exception if a cached page along the way - exists and is still usable - """ - raise NotImplementedError() + def _forge_cached_headers(self): + # overwrite headers_out to forge a brand new not-modified response + headers = Headers() + for header in ( + # Required from sec 10.3.5: + 'date', 'etag', 'content-location', 'expires', + 'cache-control', 'vary', + # Others: + 'server', 'proxy-authenticate', 'www-authenticate', 'warning'): + value = self._headers_in.getRawHeaders(header) + if value is not None: + headers.setRawHeaders(header, value) + return headers def relative_path(self, includeparams=True): """return the normalized path of the request (ie at least relative diff -r 166e6d5d8e17 -r d5b1b75805dd web/test/unittest_http.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/test/unittest_http.py Thu Mar 15 17:59:27 2012 +0100 @@ -0,0 +1,282 @@ +from logilab.common.testlib import TestCase, unittest_main, tag, Tags + +from cubicweb.web import StatusResponse +from cubicweb.devtools.fake import FakeRequest + + +def _test_cache(hin, hout, method='GET'): + """forge and process a request + + return status code and the request object + + status is None is no cache is involved + """ + # forge request + req = FakeRequest(method=method) + for key, value in hin: + req._headers_in.addRawHeader(key, str(value)) + for key, value in hout: + req.headers_out.addRawHeader(key, str(value)) + # process + status = None + try: + req.validate_cache() + except StatusResponse, ex: + status = ex.status + return status, req + +class HTTPCache(TestCase): + """Check that the http cache logiac work as expected + (as far as we understood the RFC) + + """ + tags = TestCase.tags | Tags('http', 'cache') + + + def assertCache(self, expected, status, situation=''): + """simple assert for nicer message""" + if expected != status: + if expected is None: + expected = "MODIFIED" + if status is None: + status = "MODIFIED" + msg = 'expected %r got %r' % (expected, status) + if situation: + msg = "%s - when: %s" % (msg, situation) + self.fail(msg) + + def test_IN_none_OUT_none(self): + #: test that no caching is requested when not data is available + #: on any side + status, req =_test_cache((),()) + self.assertIsNone(status) + + def test_IN_Some_OUT_none(self): + #: test that no caching is requested when no data is available + #: server (origin) side + hin = [('if-modified-since','Sat, 14 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, ()) + self.assertIsNone(status) + hin = [('if-none-match','babar/huitre'), + ] + status, req = _test_cache(hin, ()) + self.assertIsNone(status) + hin = [('if-modified-since','Sat, 14 Apr 2012 14:39:32 GM'), + ('if-none-match','babar/huitre'), + ] + status, req = _test_cache(hin, ()) + self.assertIsNone(status) + + def test_IN_none_OUT_Some(self): + #: test that no caching is requested when no data is provided + #: by the client + hout = [('last-modified','Sat, 14 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache((), hout) + self.assertIsNone(status) + hout = [('etag','babar/huitre'), + ] + status, req = _test_cache((), hout) + self.assertIsNone(status) + hout = [('last-modified', 'Sat, 14 Apr 2012 14:39:32 GM'), + ('etag','babar/huitre'), + ] + status, req = _test_cache((), hout) + self.assertIsNone(status) + + @tag('last_modified') + def test_last_modified_newer(self): + #: test the proper behavior of modification date only + # newer + hin = [('if-modified-since', 'Sat, 13 Apr 2012 14:39:32 GM'), + ] + hout = [('last-modified', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'origin is newer than client') + + @tag('last_modified') + def test_last_modified_older(self): + # older + hin = [('if-modified-since', 'Sat, 15 Apr 2012 14:39:32 GM'), + ] + hout = [('last-modified', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'origin is older than client') + + @tag('last_modified') + def test_last_modified_same(self): + # same + hin = [('if-modified-since', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + hout = [('last-modified', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'origin is equal to client') + + @tag('etag') + def test_etag_mismatch(self): + #: test the proper behavior of etag only + # etag mismatch + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'celestine'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'etag mismatch') + + @tag('etag') + def test_etag_match(self): + # etag match + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'babar'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'etag match') + # etag match in multiple + hin = [('if-none-match', 'loutre'), + ('if-none-match', 'babar'), + ] + hout = [('etag', 'babar'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'etag match in multiple') + # client use "*" as etag + hin = [('if-none-match', '*'), + ] + hout = [('etag', 'babar'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'client use "*" as etag') + + @tag('etag', 'last_modified') + def test_both(self): + #: test the proper behavior of etag only + # both wrong + hin = [('if-none-match', 'babar'), + ('if-modified-since', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + hout = [('etag', 'loutre'), + ('last-modified', 'Sat, 15 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'both wrong') + + @tag('etag', 'last_modified') + def test_both_etag_mismatch(self): + # both etag mismatch + hin = [('if-none-match', 'babar'), + ('if-modified-since', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + hout = [('etag', 'loutre'), + ('last-modified', 'Sat, 13 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'both but etag mismatch') + + @tag('etag', 'last_modified') + def test_both_but_modified(self): + # both but modified + hin = [('if-none-match', 'babar'), + ('if-modified-since', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + hout = [('etag', 'babar'), + ('last-modified', 'Sat, 15 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'both but modified') + + @tag('etag', 'last_modified') + def test_both_ok(self): + # both ok + hin = [('if-none-match', 'babar'), + ('if-modified-since', 'Sat, 14 Apr 2012 14:39:32 GM'), + ] + hout = [('etag', 'babar'), + ('last-modified', 'Sat, 13 Apr 2012 14:39:32 GM'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'both ok') + + @tag('etag', 'HEAD') + def test_head_verb(self): + #: check than FOUND 200 is properly raise without content on HEAD request + #: This logic does not really belong here :-/ + # modified + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'rhino/really-not-babar'), + ] + status, req = _test_cache(hin, hout, method='HEAD') + self.assertCache(200, status, 'modifier HEAD verb') + # not modified + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'babar'), + ] + status, req = _test_cache(hin, hout, method='HEAD') + self.assertCache(304, status, 'not modifier HEAD verb') + + @tag('etag', 'POST') + def test_post_verb(self): + # modified + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'rhino/really-not-babar'), + ] + status, req = _test_cache(hin, hout, method='POST') + self.assertCache(None, status, 'modifier HEAD verb') + # not modified + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'babar'), + ] + status, req = _test_cache(hin, hout, method='POST') + self.assertCache(412, status, 'not modifier HEAD verb') + + @tag('expires') + def test_expires_added(self): + #: Check that Expires header is added: + #: - when the page is modified + #: - when none was already present + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'rhino/really-not-babar'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'modifier HEAD verb') + value = req.headers_out.getHeader('expires') + self.assertIsNotNone(value) + + @tag('expires') + def test_expires_not_added(self): + #: Check that Expires header is not added if NOT-MODIFIED + hin = [('if-none-match', 'babar'), + ] + hout = [('etag', 'babar'), + ] + status, req = _test_cache(hin, hout) + self.assertCache(304, status, 'not modifier HEAD verb') + value = req.headers_out.getHeader('expires') + self.assertIsNone(value) + + @tag('expires') + def test_expires_no_overwrite(self): + #: Check that cache does not overwrite existing Expires header + hin = [('if-none-match', 'babar'), + ] + DATE = 'Sat, 13 Apr 2012 14:39:32 GM' + hout = [('etag', 'rhino/really-not-babar'), + ('expires', DATE), + ] + status, req = _test_cache(hin, hout) + self.assertCache(None, status, 'not modifier HEAD verb') + value = req.headers_out.getRawHeaders('expires') + self.assertEqual(value, [DATE]) + + +if __name__ == '__main__': + unittest_main() diff -r 166e6d5d8e17 -r d5b1b75805dd wsgi/request.py --- a/wsgi/request.py Thu Mar 15 17:57:40 2012 +0100 +++ b/wsgi/request.py Thu Mar 15 17:59:27 2012 +0100 @@ -150,19 +150,3 @@ postdata = buf.getvalue() buf.close() return postdata - - def _validate_cache(self): - """raise a `DirectResponse` exception if a cached page along the way - exists and is still usable - """ - if self.get_header('Cache-Control') in ('max-age=0', 'no-cache'): - # Expires header seems to be required by IE7 - self.add_header('Expires', 'Sat, 01 Jan 2000 00:00:00 GMT') - return -# try: -# http.checkPreconditions(self._twreq, _PreResponse(self)) -# except http.HTTPError, ex: -# self.info('valid http cache, no actual rendering') -# raise DirectResponse(ex.response) - # Expires header seems to be required by IE7 - self.add_header('Expires', 'Sat, 01 Jan 2000 00:00:00 GMT')