# HG changeset patch # User Adrien Di Mascio # Date 1301661251 -7200 # Node ID 93a19c1831aac2e1eab183bb85717565c3c5119c # Parent 62561ea082d23d2e19ec1ab2b8e5d37324074a58 [http] implement 1587305: provide better implementation of Accept header parsing + tests diff -r 62561ea082d2 -r 93a19c1831aa web/request.py --- a/web/request.py Fri Apr 01 14:25:55 2011 +0200 +++ b/web/request.py Fri Apr 01 14:34:11 2011 +0200 @@ -734,26 +734,14 @@ return None, None def parse_accept_header(self, header): - """returns an ordered list of preferred languages""" + """returns an ordered list of accepted values""" + try: + value_parser, value_sort_key = ACCEPT_HEADER_PARSER[header.lower()] + except KeyError: + value_parser = value_sort_key = None accepteds = self.get_header(header, '') - values = [] - for info in accepteds.split(','): - try: - value, scores = info.split(';', 1) - except ValueError: - value = info - score = 1.0 - else: - for score in scores.split(';'): - try: - scorekey, scoreval = score.split('=') - if scorekey == 'q': # XXX 'level' - score = float(scoreval) - except ValueError: - continue - values.append((score, value)) - values.sort(reverse=True) - return (value for (score, value) in values) + values = _parse_accept_header(accepteds, value_parser, value_sort_key) + return (raw_value for (raw_value, parsed_value, score) in values) def header_if_modified_since(self): """If the HTTP header If-modified-since is set, return the equivalent @@ -858,5 +846,91 @@ self.parse_accept_header('Accept-Language')] + +## HTTP-accept parsers / utilies ############################################## +def _mimetype_sort_key(accept_info): + """accepted mimetypes must be sorted by : + + 1/ highest score first + 2/ most specific mimetype first, e.g. : + - 'text/html level=1' is more specific 'text/html' + - 'text/html' is more specific than 'text/*' + - 'text/*' itself more specific than '*/*' + + """ + raw_value, (media_type, media_subtype, media_type_params), score = accept_info + # FIXME: handle '+' in media_subtype ? (should xhtml+xml have a + # higher precedence than xml ?) + if media_subtype == '*': + score -= 0.0001 + if media_type == '*': + score -= 0.0001 + return 1./score, media_type, media_subtype, 1./(1+len(media_type_params)) + +def _charset_sort_key(accept_info): + """accepted mimetypes must be sorted by : + + 1/ highest score first + 2/ most specific charset first, e.g. : + - 'utf-8' is more specific than '*' + """ + raw_value, value, score = accept_info + if value == '*': + score -= 0.0001 + return 1./score, value + +def _parse_accept_header(raw_header, value_parser=None, value_sort_key=None): + """returns an ordered list accepted types + + returned value is a list of 2-tuple (value, score), ordered + by score. Exact type of `value` will depend on what `value_parser` + will reutrn. if `value_parser` is None, then the raw value, as found + in the http header, is used. + """ + if value_sort_key is None: + value_sort_key = lambda infos: 1./infos[-1] + values = [] + for info in raw_header.split(','): + score = 1.0 + other_params = {} + try: + value, infodef = info.split(';', 1) + except ValueError: + value = info + else: + for info in infodef.split(';'): + try: + infokey, infoval = info.split('=') + if infokey == 'q': # XXX 'level' + score = float(infoval) + continue + except ValueError: + continue + other_params[infokey] = infoval + parsed_value = value_parser(value, other_params) if value_parser else value + values.append( (value.strip(), parsed_value, score) ) + values.sort(key=value_sort_key) + return values + + +def _mimetype_parser(value, other_params): + """return a 3-tuple + (type, subtype, type_params) corresponding to the mimetype definition + e.g. : for 'text/*', `mimetypeinfo` will be ('text', '*', {}), for + 'text/html;level=1', `mimetypeinfo` will be ('text', '*', {'level': '1'}) + """ + try: + media_type, media_subtype = value.strip().split('/') + except ValueError: # safety belt : '/' should always be present + media_type = value.strip() + media_subtype = '*' + return (media_type, media_subtype, other_params) + + +ACCEPT_HEADER_PARSER = { + 'accept': (_mimetype_parser, _mimetype_sort_key), + 'accept-charset': (None, _charset_sort_key), + } + from cubicweb import set_log_methods set_log_methods(CubicWebRequestBase, LOGGER) diff -r 62561ea082d2 -r 93a19c1831aa web/test/unittest_request.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/test/unittest_request.py Fri Apr 01 14:34:11 2011 +0200 @@ -0,0 +1,69 @@ +"""misc. unittests for utility functions +""" + +from logilab.common.testlib import TestCase, unittest_main + +from functools import partial + +from cubicweb.web.request import (_parse_accept_header, + _mimetype_sort_key, _mimetype_parser, _charset_sort_key) + + + +class AcceptParserTC(TestCase): + + def test_parse_accept(self): + parse_accept_header = partial(_parse_accept_header, + value_parser=_mimetype_parser, + value_sort_key=_mimetype_sort_key) + # compare scores + self.assertEqual(parse_accept_header("audio/*;q=0.2, audio/basic"), + [( ('audio/basic', ('audio', 'basic', {}), 1.0 ) ), + ( ('audio/*', ('audio', '*', {}), 0.2 ) )]) + self.assertEqual(parse_accept_header("text/plain;q=0.5, text/html, text/x-dvi;q=0.8, text/x-c"), + [( ('text/html', ('text', 'html', {}), 1.0 ) ), + ( ('text/x-c', ('text', 'x-c', {}), 1.0 ) ), + ( ('text/x-dvi', ('text', 'x-dvi', {}), 0.8 ) ), + ( ('text/plain', ('text', 'plain', {}), 0.5 ) )]) + # compare mimetype precedence for a same given score + self.assertEqual(parse_accept_header("audio/*, audio/basic"), + [( ('audio/basic', ('audio', 'basic', {}), 1.0 ) ), + ( ('audio/*', ('audio', '*', {}), 1.0 ) )]) + self.assertEqual(parse_accept_header("text/*, text/html, text/html;level=1, */*"), + [( ('text/html', ('text', 'html', {'level': '1'}), 1.0 ) ), + ( ('text/html', ('text', 'html', {}), 1.0 ) ), + ( ('text/*', ('text', '*', {}), 1.0 ) ), + ( ('*/*', ('*', '*', {}), 1.0 ) )]) + # free party + self.assertEqual(parse_accept_header("text/*;q=0.3, text/html;q=0.7, text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5"), + [( ('text/html', ('text', 'html', {'level': '1'}), 1.0 ) ), + ( ('text/html', ('text', 'html', {}), 0.7 ) ), + ( ('*/*', ('*', '*', {}), 0.5 ) ), + ( ('text/html', ('text', 'html', {'level': '2'}), 0.4 ) ), + ( ('text/*', ('text', '*', {}), 0.3 ) ) + ]) + # chrome sample header + self.assertEqual(parse_accept_header("application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5"), + [( ('application/xhtml+xml', ('application', 'xhtml+xml', {}), 1.0 ) ), + ( ('application/xml', ('application', 'xml', {}), 1.0 ) ), + ( ('image/png', ('image', 'png', {}), 1.0 ) ), + ( ('text/html', ('text', 'html', {}), 0.9 ) ), + ( ('text/plain', ('text', 'plain', {}), 0.8 ) ), + ( ('*/*', ('*', '*', {}), 0.5 ) ), + ]) + + def test_parse_accept_language(self): + self.assertEqual(_parse_accept_header('fr,fr-fr;q=0.8,en-us;q=0.5,en;q=0.3'), + [('fr', 'fr', 1.0), ('fr-fr', 'fr-fr', 0.8), + ('en-us', 'en-us', 0.5), ('en', 'en', 0.3)]) + + def test_parse_accept_charset(self): + parse_accept_header = partial(_parse_accept_header, + value_sort_key=_charset_sort_key) + self.assertEqual(parse_accept_header('ISO-8859-1,utf-8;q=0.7,*;q=0.7'), + [('ISO-8859-1', 'ISO-8859-1', 1.0), + ('utf-8', 'utf-8', 0.7), + ('*', '*', 0.7)]) + +if __name__ == '__main__': + unittest_main()