--- a/web/request.py Fri Apr 01 14:25:55 2011 +0200
+++ b/web/request.py Fri Apr 01 14:34:11 2011 +0200
@@ -734,26 +734,14 @@
return None, None
def parse_accept_header(self, header):
- """returns an ordered list of preferred languages"""
+ """returns an ordered list of accepted values"""
+ try:
+ value_parser, value_sort_key = ACCEPT_HEADER_PARSER[header.lower()]
+ except KeyError:
+ value_parser = value_sort_key = None
accepteds = self.get_header(header, '')
- values = []
- for info in accepteds.split(','):
- try:
- value, scores = info.split(';', 1)
- except ValueError:
- value = info
- score = 1.0
- else:
- for score in scores.split(';'):
- try:
- scorekey, scoreval = score.split('=')
- if scorekey == 'q': # XXX 'level'
- score = float(scoreval)
- except ValueError:
- continue
- values.append((score, value))
- values.sort(reverse=True)
- return (value for (score, value) in values)
+ values = _parse_accept_header(accepteds, value_parser, value_sort_key)
+ return (raw_value for (raw_value, parsed_value, score) in values)
def header_if_modified_since(self):
"""If the HTTP header If-modified-since is set, return the equivalent
@@ -858,5 +846,91 @@
self.parse_accept_header('Accept-Language')]
+
+## HTTP-accept parsers / utilies ##############################################
+def _mimetype_sort_key(accept_info):
+ """accepted mimetypes must be sorted by :
+
+ 1/ highest score first
+ 2/ most specific mimetype first, e.g. :
+ - 'text/html level=1' is more specific 'text/html'
+ - 'text/html' is more specific than 'text/*'
+ - 'text/*' itself more specific than '*/*'
+
+ """
+ raw_value, (media_type, media_subtype, media_type_params), score = accept_info
+ # FIXME: handle '+' in media_subtype ? (should xhtml+xml have a
+ # higher precedence than xml ?)
+ if media_subtype == '*':
+ score -= 0.0001
+ if media_type == '*':
+ score -= 0.0001
+ return 1./score, media_type, media_subtype, 1./(1+len(media_type_params))
+
+def _charset_sort_key(accept_info):
+ """accepted mimetypes must be sorted by :
+
+ 1/ highest score first
+ 2/ most specific charset first, e.g. :
+ - 'utf-8' is more specific than '*'
+ """
+ raw_value, value, score = accept_info
+ if value == '*':
+ score -= 0.0001
+ return 1./score, value
+
+def _parse_accept_header(raw_header, value_parser=None, value_sort_key=None):
+ """returns an ordered list accepted types
+
+ returned value is a list of 2-tuple (value, score), ordered
+ by score. Exact type of `value` will depend on what `value_parser`
+ will reutrn. if `value_parser` is None, then the raw value, as found
+ in the http header, is used.
+ """
+ if value_sort_key is None:
+ value_sort_key = lambda infos: 1./infos[-1]
+ values = []
+ for info in raw_header.split(','):
+ score = 1.0
+ other_params = {}
+ try:
+ value, infodef = info.split(';', 1)
+ except ValueError:
+ value = info
+ else:
+ for info in infodef.split(';'):
+ try:
+ infokey, infoval = info.split('=')
+ if infokey == 'q': # XXX 'level'
+ score = float(infoval)
+ continue
+ except ValueError:
+ continue
+ other_params[infokey] = infoval
+ parsed_value = value_parser(value, other_params) if value_parser else value
+ values.append( (value.strip(), parsed_value, score) )
+ values.sort(key=value_sort_key)
+ return values
+
+
+def _mimetype_parser(value, other_params):
+ """return a 3-tuple
+ (type, subtype, type_params) corresponding to the mimetype definition
+ e.g. : for 'text/*', `mimetypeinfo` will be ('text', '*', {}), for
+ 'text/html;level=1', `mimetypeinfo` will be ('text', '*', {'level': '1'})
+ """
+ try:
+ media_type, media_subtype = value.strip().split('/')
+ except ValueError: # safety belt : '/' should always be present
+ media_type = value.strip()
+ media_subtype = '*'
+ return (media_type, media_subtype, other_params)
+
+
+ACCEPT_HEADER_PARSER = {
+ 'accept': (_mimetype_parser, _mimetype_sort_key),
+ 'accept-charset': (None, _charset_sort_key),
+ }
+
from cubicweb import set_log_methods
set_log_methods(CubicWebRequestBase, LOGGER)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/web/test/unittest_request.py Fri Apr 01 14:34:11 2011 +0200
@@ -0,0 +1,69 @@
+"""misc. unittests for utility functions
+"""
+
+from logilab.common.testlib import TestCase, unittest_main
+
+from functools import partial
+
+from cubicweb.web.request import (_parse_accept_header,
+ _mimetype_sort_key, _mimetype_parser, _charset_sort_key)
+
+
+
+class AcceptParserTC(TestCase):
+
+ def test_parse_accept(self):
+ parse_accept_header = partial(_parse_accept_header,
+ value_parser=_mimetype_parser,
+ value_sort_key=_mimetype_sort_key)
+ # compare scores
+ self.assertEqual(parse_accept_header("audio/*;q=0.2, audio/basic"),
+ [( ('audio/basic', ('audio', 'basic', {}), 1.0 ) ),
+ ( ('audio/*', ('audio', '*', {}), 0.2 ) )])
+ self.assertEqual(parse_accept_header("text/plain;q=0.5, text/html, text/x-dvi;q=0.8, text/x-c"),
+ [( ('text/html', ('text', 'html', {}), 1.0 ) ),
+ ( ('text/x-c', ('text', 'x-c', {}), 1.0 ) ),
+ ( ('text/x-dvi', ('text', 'x-dvi', {}), 0.8 ) ),
+ ( ('text/plain', ('text', 'plain', {}), 0.5 ) )])
+ # compare mimetype precedence for a same given score
+ self.assertEqual(parse_accept_header("audio/*, audio/basic"),
+ [( ('audio/basic', ('audio', 'basic', {}), 1.0 ) ),
+ ( ('audio/*', ('audio', '*', {}), 1.0 ) )])
+ self.assertEqual(parse_accept_header("text/*, text/html, text/html;level=1, */*"),
+ [( ('text/html', ('text', 'html', {'level': '1'}), 1.0 ) ),
+ ( ('text/html', ('text', 'html', {}), 1.0 ) ),
+ ( ('text/*', ('text', '*', {}), 1.0 ) ),
+ ( ('*/*', ('*', '*', {}), 1.0 ) )])
+ # free party
+ self.assertEqual(parse_accept_header("text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5"),
+ [( ('text/html', ('text', 'html', {'level': '1'}), 1.0 ) ),
+ ( ('text/html', ('text', 'html', {}), 0.7 ) ),
+ ( ('*/*', ('*', '*', {}), 0.5 ) ),
+ ( ('text/html', ('text', 'html', {'level': '2'}), 0.4 ) ),
+ ( ('text/*', ('text', '*', {}), 0.3 ) )
+ ])
+ # chrome sample header
+ self.assertEqual(parse_accept_header("application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"),
+ [( ('application/xhtml+xml', ('application', 'xhtml+xml', {}), 1.0 ) ),
+ ( ('application/xml', ('application', 'xml', {}), 1.0 ) ),
+ ( ('image/png', ('image', 'png', {}), 1.0 ) ),
+ ( ('text/html', ('text', 'html', {}), 0.9 ) ),
+ ( ('text/plain', ('text', 'plain', {}), 0.8 ) ),
+ ( ('*/*', ('*', '*', {}), 0.5 ) ),
+ ])
+
+ def test_parse_accept_language(self):
+ self.assertEqual(_parse_accept_header('fr,fr-fr;q=0.8,en-us;q=0.5,en;q=0.3'),
+ [('fr', 'fr', 1.0), ('fr-fr', 'fr-fr', 0.8),
+ ('en-us', 'en-us', 0.5), ('en', 'en', 0.3)])
+
+ def test_parse_accept_charset(self):
+ parse_accept_header = partial(_parse_accept_header,
+ value_sort_key=_charset_sort_key)
+ self.assertEqual(parse_accept_header('ISO-8859-1,utf-8;q=0.7,*;q=0.7'),
+ [('ISO-8859-1', 'ISO-8859-1', 1.0),
+ ('utf-8', 'utf-8', 0.7),
+ ('*', '*', 0.7)])
+
+if __name__ == '__main__':
+ unittest_main()