diff -r 058bb3dc685f -r 0b59724cb3f2 web/http_headers.py --- a/web/http_headers.py Mon Jan 04 18:40:30 2016 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1645 +0,0 @@ -# This file has been extracted from the abandoned TwistedWeb2 project -# http://twistedmatrix.com/trac/wiki/TwistedWeb2 - - -import time -from calendar import timegm -import base64 -import re - -from six import string_types -from six.moves.urllib.parse import urlparse - - -def dashCapitalize(s): - ''' Capitalize a string, making sure to treat - as a word seperator ''' - return '-'.join([ x.capitalize() for x in s.split('-')]) - -# datetime parsing and formatting -weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] -weekdayname_lower = [name.lower() for name in weekdayname] -monthname = [None, - 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', - 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] -monthname_lower = [name and name.lower() for name in monthname] - -# HTTP Header parsing API - -header_case_mapping = {} - -def casemappingify(d): - global header_case_mapping - newd = dict([(key.lower(), key) for key in d]) - header_case_mapping.update(newd) - -def lowerify(d): - return dict([(key.lower(), value) for key, value in d.items()]) - - -class HeaderHandler(object): - """HeaderHandler manages header generating and parsing functions. - """ - HTTPParsers = {} - HTTPGenerators = {} - - def __init__(self, parsers=None, generators=None): - """ - @param parsers: A map of header names to parsing functions. - @type parsers: L{dict} - - @param generators: A map of header names to generating functions. - @type generators: L{dict} - """ - - if parsers: - self.HTTPParsers.update(parsers) - if generators: - self.HTTPGenerators.update(generators) - - def parse(self, name, header): - """ - Parse the given header based on its given name. - - @param name: The header name to parse. - @type name: C{str} - - @param header: A list of unparsed headers. - @type header: C{list} of C{str} - - @return: The return value is the parsed header representation, - it is dependent on the header. See the HTTP Headers document. - """ - parser = self.HTTPParsers.get(name, None) - if parser is None: - raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,)) - - try: - for p in parser: - #print "==> Parsing %s: %s(%s)" % (name, repr(p), repr(header)) - header = p(header) - # if isinstance(h, types.GeneratorType): - # h = list(h) - except ValueError as v: - # print v - header = None - - return header - - def generate(self, name, header): - """ - Generate the given header based on its given name. - - @param name: The header name to generate. - @type name: C{str} - - @param header: A parsed header, such as the output of - L{HeaderHandler}.parse. - - @return: C{list} of C{str} each representing a generated HTTP header. - """ - generator = self.HTTPGenerators.get(name, None) - - if generator is None: - # print self.generators - raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,)) - - for g in generator: - header = g(header) - - #self._raw_headers[name] = h - return header - - def updateParsers(self, parsers): - """Update en masse the parser maps. - - @param parsers: Map of header names to parser chains. - @type parsers: C{dict} - """ - casemappingify(parsers) - self.HTTPParsers.update(lowerify(parsers)) - - def addParser(self, name, value): - """Add an individual parser chain for the given header. - - @param name: Name of the header to add - @type name: C{str} - - @param value: The parser chain - @type value: C{str} - """ - self.updateParsers({name: value}) - - def updateGenerators(self, generators): - """Update en masse the generator maps. - - @param parsers: Map of header names to generator chains. - @type parsers: C{dict} - """ - casemappingify(generators) - self.HTTPGenerators.update(lowerify(generators)) - - def addGenerators(self, name, value): - """Add an individual generator chain for the given header. - - @param name: Name of the header to add - @type name: C{str} - - @param value: The generator chain - @type value: C{str} - """ - self.updateGenerators({name: value}) - - def update(self, parsers, generators): - """Conveniently update parsers and generators all at once. - """ - self.updateParsers(parsers) - self.updateGenerators(generators) - - -DefaultHTTPHandler = HeaderHandler() - - -## HTTP DateTime parser -def parseDateTime(dateString): - """Convert an HTTP date string (one of three formats) to seconds since epoch.""" - parts = dateString.split() - - if not parts[0][0:3].lower() in weekdayname_lower: - # Weekday is stupid. Might have been omitted. - try: - return parseDateTime("Sun, "+dateString) - except ValueError: - # Guess not. - pass - - partlen = len(parts) - if (partlen == 5 or partlen == 6) and parts[1].isdigit(): - # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT - # (Note: "GMT" is literal, not a variable timezone) - # (also handles without "GMT") - # This is the normal format - day = parts[1] - month = parts[2] - year = parts[3] - time = parts[4] - elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: - # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT - # (Note: "GMT" is literal, not a variable timezone) - # (also handles without without "GMT") - # Two digit year, yucko. - day, month, year = parts[1].split('-') - time = parts[2] - year = int(year) - if year < 69: - year = year + 2000 - elif year < 100: - year = year + 1900 - elif len(parts) == 5: - # 3rd date format: Sun Nov 6 08:49:37 1994 - # ANSI C asctime() format. - day = parts[2] - month = parts[1] - year = parts[4] - time = parts[3] - else: - raise ValueError("Unknown datetime format %r" % dateString) - - day = int(day) - month = int(monthname_lower.index(month.lower())) - year = int(year) - hour, min, sec = map(int, time.split(':')) - return int(timegm((year, month, day, hour, min, sec))) - - -##### HTTP tokenizer -class Token(str): - __slots__=[] - tokens = {} - def __new__(self, char): - token = Token.tokens.get(char) - if token is None: - Token.tokens[char] = token = str.__new__(self, char) - return token - - def __repr__(self): - return "Token(%s)" % str.__repr__(self) - - -http_tokens = " \t\"()<>@,;:\\/[]?={}" -http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" - -def tokenize(header, foldCase=True): - """Tokenize a string according to normal HTTP header parsing rules. - - In particular: - - Whitespace is irrelevant and eaten next to special separator tokens. - Its existance (but not amount) is important between character strings. - - Quoted string support including embedded backslashes. - - Case is insignificant (and thus lowercased), except in quoted strings. - (unless foldCase=False) - - Multiple headers are concatenated with ',' - - NOTE: not all headers can be parsed with this function. - - Takes a raw header value (list of strings), and - Returns a generator of strings and Token class instances. - """ - tokens = http_tokens - ctls = http_ctls - - string = ",".join(header) - list = [] - start = 0 - cur = 0 - quoted = False - qpair = False - inSpaces = -1 - qstring = None - - for x in string: - if quoted: - if qpair: - qpair = False - qstring = qstring+string[start:cur-1]+x - start = cur+1 - elif x == '\\': - qpair = True - elif x == '"': - quoted = False - yield qstring+string[start:cur] - qstring = None - start = cur+1 - elif x in tokens: - if start != cur: - if foldCase: - yield string[start:cur].lower() - else: - yield string[start:cur] - - start = cur+1 - if x == '"': - quoted = True - qstring = "" - inSpaces = False - elif x in " \t": - if inSpaces is False: - inSpaces = True - else: - inSpaces = -1 - yield Token(x) - elif x in ctls: - raise ValueError("Invalid control character: %d in header" % ord(x)) - else: - if inSpaces is True: - yield Token(' ') - inSpaces = False - - inSpaces = False - cur = cur+1 - - if qpair: - raise ValueError("Missing character after '\\'") - if quoted: - raise ValueError("Missing end quote") - - if start != cur: - if foldCase: - yield string[start:cur].lower() - else: - yield string[start:cur] - -def split(seq, delim): - """The same as str.split but works on arbitrary sequences. - Too bad it's not builtin to python!""" - - cur = [] - for item in seq: - if item == delim: - yield cur - cur = [] - else: - cur.append(item) - yield cur - -# def find(seq, *args): -# """The same as seq.index but returns -1 if not found, instead -# Too bad it's not builtin to python!""" -# try: -# return seq.index(value, *args) -# except ValueError: -# return -1 - - -def filterTokens(seq): - """Filter out instances of Token, leaving only a list of strings. - - Used instead of a more specific parsing method (e.g. splitting on commas) - when only strings are expected, so as to be a little lenient. - - Apache does it this way and has some comments about broken clients which - forget commas (?), so I'm doing it the same way. It shouldn't - hurt anything, in any case. - """ - - l = [] - for x in seq: - if not isinstance(x, Token): - l.append(x) - return l - -##### parser utilities: -def checkSingleToken(tokens): - if len(tokens) != 1: - raise ValueError("Expected single token, not %s." % (tokens,)) - return tokens[0] - -def parseKeyValue(val): - if len(val) == 1: - return val[0], None - elif len(val) == 3 and val[1] == Token('='): - return val[0], val[2] - raise ValueError("Expected key or key=value, but got %s." % (val,)) - -def parseArgs(field): - args = split(field, Token(';')) - val = next(args) - args = [parseKeyValue(arg) for arg in args] - return val, args - -def listParser(fun): - """Return a function which applies 'fun' to every element in the - comma-separated list""" - def listParserHelper(tokens): - fields = split(tokens, Token(',')) - for field in fields: - if len(field) != 0: - yield fun(field) - - return listParserHelper - -def last(seq): - """Return seq[-1]""" - return seq[-1] - -def unique(seq): - '''if seq is not a string, check it's a sequence of one element and return it''' - if isinstance(seq, string_types): - return seq - if len(seq) != 1: - raise ValueError('single value required, not %s' % seq) - return seq[0] - -def parseHTTPMethod(method): - """Ensure a HTTP method is valid according the rfc2616, but extension-method ones""" - method = method.strip() - if method not in ("OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE", - "TRACE", "CONNECT"): - raise ValueError('Unsupported HTTP method %s' % method) - return method - -def parseAllowOrigin(origin): - """Ensure origin is a valid URL-base stuff, or null""" - if origin == 'null': - return origin - p = urlparse(origin) - if p.params or p.query or p.username or p.path not in ('', '/'): - raise ValueError('Incorrect Accept-Control-Allow-Origin value %s' % origin) - if p.scheme not in ('http', 'https'): - raise ValueError('Unsupported Accept-Control-Allow-Origin URL scheme %s' % origin) - if not p.netloc: - raise ValueError('Accept-Control-Allow-Origin: host name cannot be unset (%s)' % origin) - return origin - -def parseAllowCreds(cred): - """Can be "true" """ - if cred: - cred = cred.lower() - if cred and cred != 'true': - raise ValueError('Accept-Control-Allow-Credentials can only be "true" (%s)' % cred) - return cred - -##### Generation utilities -def quoteString(s): - return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"') - -def listGenerator(fun): - """Return a function which applies 'fun' to every element in - the given list, then joins the result with generateList""" - def listGeneratorHelper(l): - return generateList([fun(e) for e in l]) - - return listGeneratorHelper - -def generateList(seq): - return ", ".join(seq) - -def singleHeader(item): - return [item] - -def generateKeyValues(kvs): - l = [] - # print kvs - for k, v in kvs: - if v is None: - l.append('%s' % k) - else: - l.append('%s=%s' % (k, v)) - return ";".join(l) - -def generateTrueFalse(value): - """ - Return 'true' or 'false' depending on the value. - - * 'true' values are `True`, `1`, `"true"` - * 'false' values are `False`, `0`, `"false"` - - """ - if (value in (True, 1) or - isinstance(value, string_types) and value.lower() == 'true'): - return 'true' - if (value in (False, 0) or - isinstance(value, string_types) and value.lower() == 'false'): - return 'false' - raise ValueError("Invalid true/false header value: %s" % value) - -class MimeType(object): - @classmethod - def fromString(klass, mimeTypeString): - """Generate a MimeType object from the given string. - - @param mimeTypeString: The mimetype to parse - - @return: L{MimeType} - """ - return DefaultHTTPHandler.parse('content-type', [mimeTypeString]) - - def __init__(self, mediaType, mediaSubtype, params={}, **kwargs): - """ - @type mediaType: C{str} - - @type mediaSubtype: C{str} - - @type params: C{dict} - """ - self.mediaType = mediaType - self.mediaSubtype = mediaSubtype - self.params = dict(params) - - if kwargs: - self.params.update(kwargs) - - def __eq__(self, other): - if not isinstance(other, MimeType): return NotImplemented - return (self.mediaType == other.mediaType and - self.mediaSubtype == other.mediaSubtype and - self.params == other.params) - - def __ne__(self, other): - return not self.__eq__(other) - - def __repr__(self): - return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params) - - def __hash__(self): - return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.items())) - -##### Specific header parsers. -def parseAccept(field): - type, args = parseArgs(field) - - if len(type) != 3 or type[1] != Token('/'): - raise ValueError("MIME Type "+str(type)+" invalid.") - - # okay, this spec is screwy. A 'q' parameter is used as the separator - # between MIME parameters and (as yet undefined) additional HTTP - # parameters. - - num = 0 - for arg in args: - if arg[0] == 'q': - mimeparams = tuple(args[0:num]) - params = args[num:] - break - num = num + 1 - else: - mimeparams = tuple(args) - params = [] - - # Default values for parameters: - qval = 1.0 - - # Parse accept parameters: - for param in params: - if param[0] == 'q': - qval = float(param[1]) - else: - # Warn? ignored parameter. - pass - - ret = MimeType(type[0], type[2], mimeparams), qval - return ret - -def parseAcceptQvalue(field): - type, args = parseArgs(field) - - type = checkSingleToken(type) - - qvalue = 1.0 # Default qvalue is 1 - for arg in args: - if arg[0] == 'q': - qvalue = float(arg[1]) - return type, qvalue - -def addDefaultCharset(charsets): - if charsets.get('*') is None and charsets.get('iso-8859-1') is None: - charsets['iso-8859-1'] = 1.0 - return charsets - -def addDefaultEncoding(encodings): - if encodings.get('*') is None and encodings.get('identity') is None: - # RFC doesn't specify a default value for identity, only that it - # "is acceptable" if not mentioned. Thus, give it a very low qvalue. - encodings['identity'] = .0001 - return encodings - - -def parseContentType(header): - # Case folding is disabled for this header, because of use of - # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf - # So, we need to explicitly .lower() the type/subtype and arg keys. - - type, args = parseArgs(header) - - if len(type) != 3 or type[1] != Token('/'): - raise ValueError("MIME Type "+str(type)+" invalid.") - - args = [(kv[0].lower(), kv[1]) for kv in args] - - return MimeType(type[0].lower(), type[2].lower(), tuple(args)) - -def parseContentMD5(header): - try: - return base64.decodestring(header) - except Exception as e: - raise ValueError(e) - -def parseContentRange(header): - """Parse a content-range header into (kind, start, end, realLength). - - realLength might be None if real length is not known ('*'). - start and end might be None if start, end unspecified (for response code 416) - """ - kind, other = header.strip().split() - if kind.lower() != "bytes": - raise ValueError("a range of type %r is not supported") - startend, realLength = other.split("/") - if startend.strip() == '*': - start, end = None, None - else: - start, end = map(int, startend.split("-")) - if realLength == "*": - realLength = None - else: - realLength = int(realLength) - return (kind, start, end, realLength) - -def parseExpect(field): - type, args = parseArgs(field) - - type = parseKeyValue(type) - return (type[0], (lambda *args:args)(type[1], *args)) - -def parseExpires(header): - # """HTTP/1.1 clients and caches MUST treat other invalid date formats, - # especially including the value 0, as in the past (i.e., "already expired").""" - - try: - return parseDateTime(header) - except ValueError: - return 0 - -def parseIfModifiedSince(header): - # Ancient versions of netscape and *current* versions of MSIE send - # If-Modified-Since: Thu, 05 Aug 2004 12:57:27 GMT; length=123 - # which is blantantly RFC-violating and not documented anywhere - # except bug-trackers for web frameworks. - - # So, we'll just strip off everything after a ';'. - return parseDateTime(header.split(';', 1)[0]) - -def parseIfRange(headers): - try: - return ETag.parse(tokenize(headers)) - except ValueError: - return parseDateTime(last(headers)) - -def parseRange(range): - range = list(range) - if len(range) < 3 or range[1] != Token('='): - raise ValueError("Invalid range header format: %s" %(range,)) - - type = range[0] - if type != 'bytes': - raise ValueError("Unknown range unit: %s." % (type,)) - rangeset = split(range[2:], Token(',')) - ranges = [] - - for byterangespec in rangeset: - if len(byterangespec) != 1: - raise ValueError("Invalid range header format: %s" % (range,)) - start, end = byterangespec[0].split('-') - - if not start and not end: - raise ValueError("Invalid range header format: %s" % (range,)) - - if start: - start = int(start) - else: - start = None - - if end: - end = int(end) - else: - end = None - - if start and end and start > end: - raise ValueError("Invalid range header, start > end: %s" % (range,)) - ranges.append((start, end)) - return type, ranges - -def parseRetryAfter(header): - try: - # delta seconds - return time.time() + int(header) - except ValueError: - # or datetime - return parseDateTime(header) - -# WWW-Authenticate and Authorization - -def parseWWWAuthenticate(tokenized): - headers = [] - - tokenList = list(tokenized) - - while tokenList: - scheme = tokenList.pop(0) - challenge = {} - last = None - kvChallenge = False - - while tokenList: - token = tokenList.pop(0) - if token == Token('='): - kvChallenge = True - challenge[last] = tokenList.pop(0) - last = None - - elif token == Token(','): - if kvChallenge: - if len(tokenList) > 1 and tokenList[1] != Token('='): - break - - else: - break - - else: - last = token - - if last and scheme and not challenge and not kvChallenge: - challenge = last - last = None - - headers.append((scheme, challenge)) - - if last and last not in (Token('='), Token(',')): - if headers[-1] == (scheme, challenge): - scheme = last - challenge = {} - headers.append((scheme, challenge)) - - return headers - -def parseAuthorization(header): - scheme, rest = header.split(' ', 1) - # this header isn't tokenized because it may eat characters - # in the unquoted base64 encoded credentials - return scheme.lower(), rest - -#### Header generators -def generateAccept(accept): - mimeType, q = accept - - out ="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) - if mimeType.params: - out+=';'+generateKeyValues(mimeType.params.items()) - - if q != 1.0: - out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.') - - return out - -def removeDefaultEncoding(seq): - for item in seq: - if item[0] != 'identity' or item[1] != .0001: - yield item - -def generateAcceptQvalue(keyvalue): - if keyvalue[1] == 1.0: - return "%s" % keyvalue[0:1] - else: - return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.') - -def parseCacheControl(kv): - k, v = parseKeyValue(kv) - if k == 'max-age' or k == 'min-fresh' or k == 's-maxage': - # Required integer argument - if v is None: - v = 0 - else: - v = int(v) - elif k == 'max-stale': - # Optional integer argument - if v is not None: - v = int(v) - elif k == 'private' or k == 'no-cache': - # Optional list argument - if v is not None: - v = [field.strip().lower() for field in v.split(',')] - return k, v - -def generateCacheControl(args): - k, v = args - if v is None: - return str(k) - else: - if k == 'no-cache' or k == 'private': - # quoted list of values - v = quoteString(generateList( - [header_case_mapping.get(name) or dashCapitalize(name) for name in v])) - return '%s=%s' % (k, v) - -def generateContentRange(tup): - """tup is (type, start, end, len) - len can be None. - """ - type, start, end, len = tup - if len == None: - len = '*' - else: - len = int(len) - if start == None and end == None: - startend = '*' - else: - startend = '%d-%d' % (start, end) - - return '%s %s/%s' % (type, startend, len) - -def generateDateTime(secSinceEpoch): - """Convert seconds since epoch to HTTP datetime string.""" - # take care gmtime doesn't handle time before epoch (crash on windows at least) - year, month, day, hh, mm, ss, wd, y, z = time.gmtime(max(0, secSinceEpoch)) - s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( - weekdayname[wd], - day, monthname[month], year, - hh, mm, ss) - return s - -def generateExpect(item): - if item[1][0] is None: - out = '%s' % (item[0],) - else: - out = '%s=%s' % (item[0], item[1][0]) - if len(item[1]) > 1: - out += ';'+generateKeyValues(item[1][1:]) - return out - -def generateRange(range): - def noneOr(s): - if s is None: - return '' - return s - - type, ranges = range - - if type != 'bytes': - raise ValueError("Unknown range unit: "+type+".") - - return (type+'='+ - ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1])) - for startend in ranges])) - -def generateRetryAfter(when): - # always generate delta seconds format - return str(int(when - time.time())) - -def generateContentType(mimeType): - out = "%s/%s" % (mimeType.mediaType, mimeType.mediaSubtype) - if mimeType.params: - out += ';' + generateKeyValues(mimeType.params.items()) - return out - -def generateIfRange(dateOrETag): - if isinstance(dateOrETag, ETag): - return dateOrETag.generate() - else: - return generateDateTime(dateOrETag) - -# WWW-Authenticate and Authorization - -def generateWWWAuthenticate(headers): - _generated = [] - for seq in headers: - scheme, challenge = seq[0], seq[1] - - # If we're going to parse out to something other than a dict - # we need to be able to generate from something other than a dict - - try: - l = [] - for k, v in dict(challenge).items(): - l.append("%s=%s" % (k, quoteString(v))) - - _generated.append("%s %s" % (scheme, ", ".join(l))) - except ValueError: - _generated.append("%s %s" % (scheme, challenge)) - - return _generated - -def generateAuthorization(seq): - return [' '.join(str(v) for v in seq)] - - -#### -class ETag(object): - def __init__(self, tag, weak=False): - self.tag = str(tag) - self.weak = weak - - def match(self, other, strongCompare): - # Sec 13.3. - # The strong comparison function: in order to be considered equal, both - # validators MUST be identical in every way, and both MUST NOT be weak. - # - # The weak comparison function: in order to be considered equal, both - # validators MUST be identical in every way, but either or both of - # them MAY be tagged as "weak" without affecting the result. - - if not isinstance(other, ETag) or other.tag != self.tag: - return False - - if strongCompare and (other.weak or self.weak): - return False - return True - - def __eq__(self, other): - return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak - - def __ne__(self, other): - return not self.__eq__(other) - - def __repr__(self): - return "Etag(%r, weak=%r)" % (self.tag, self.weak) - - def parse(tokens): - tokens = tuple(tokens) - if len(tokens) == 1 and not isinstance(tokens[0], Token): - return ETag(tokens[0]) - - if(len(tokens) == 3 and tokens[0] == "w" - and tokens[1] == Token('/')): - return ETag(tokens[2], weak=True) - - raise ValueError("Invalid ETag.") - - parse = staticmethod(parse) - - def generate(self): - if self.weak: - return 'W/'+quoteString(self.tag) - else: - return quoteString(self.tag) - -def parseStarOrETag(tokens): - tokens = tuple(tokens) - if tokens == ('*',): - return '*' - else: - return ETag.parse(tokens) - -def generateStarOrETag(etag): - if etag == '*': - return etag - else: - return etag.generate() - -#### Cookies. Blech! -class Cookie(object): - # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', - # 'discard', 'secure', 'httponly', 'comment', 'commenturl', - # 'version'] - - def __init__(self, name, value, path=None, domain=None, ports=None, - expires=None, discard=False, secure=False, httponly=False, - comment=None, commenturl=None, version=0): - self.name = name - self.value = value - self.path = path - self.domain = domain - self.ports = ports - self.expires = expires - self.discard = discard - self.secure = secure - self.httponly = httponly - self.comment = comment - self.commenturl = commenturl - self.version = version - - def __repr__(self): - s = "Cookie(%r=%r" % (self.name, self.value) - if self.path is not None: s+=", path=%r" % (self.path,) - if self.domain is not None: s+=", domain=%r" % (self.domain,) - if self.ports is not None: s+=", ports=%r" % (self.ports,) - if self.expires is not None: s+=", expires=%r" % (self.expires,) - if self.secure: s+=", secure" - if self.httponly: s+=", HttpOnly" - if self.comment is not None: s+=", comment=%r" % (self.comment,) - if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,) - if self.version != 0: s+=", version=%r" % (self.version,) - s+=")" - return s - - def __eq__(self, other): - return (isinstance(other, Cookie) and - other.path == self.path and - other.domain == self.domain and - other.ports == self.ports and - other.expires == self.expires and - other.secure == self.secure and - other.comment == self.comment and - other.commenturl == self.commenturl and - other.version == self.version) - - def __ne__(self, other): - return not self.__eq__(other) - - -def parseCookie(headers): - """Bleargh, the cookie spec sucks. - This surely needs interoperability testing. - There are two specs that are supported: - Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html - Version 1) http://www.faqs.org/rfcs/rfc2965.html - """ - - cookies = [] - # There can't really be multiple cookie headers according to RFC, because - # if multiple headers are allowed, they must be joinable with ",". - # Neither new RFC2965 cookies nor old netscape cookies are. - - header = ';'.join(headers) - if header[0:8].lower() == "$version": - # RFC2965 cookie - h = tokenize([header], foldCase=False) - r_cookies = split(h, Token(',')) - for r_cookie in r_cookies: - last_cookie = None - rr_cookies = split(r_cookie, Token(';')) - for cookie in rr_cookies: - nameval = tuple(split(cookie, Token('='))) - if len(nameval) == 2: - (name,), (value,) = nameval - else: - (name,), = nameval - value = None - - name = name.lower() - if name == '$version': - continue - if name[0] == '$': - if last_cookie is not None: - if name == '$path': - last_cookie.path = value - elif name == '$domain': - last_cookie.domain = value - elif name == '$port': - if value is None: - last_cookie.ports = () - else: - last_cookie.ports = tuple([int(s) for s in value.split(',')]) - else: - last_cookie = Cookie(name, value, version=1) - cookies.append(last_cookie) - else: - # Oldstyle cookies don't do quoted strings or anything sensible. - # All characters are valid for names except ';' and '=', and all - # characters are valid for values except ';'. Spaces are stripped, - # however. - r_cookies = header.split(';') - for r_cookie in r_cookies: - name, value = r_cookie.split('=', 1) - name = name.strip(' \t') - value = value.strip(' \t') - - cookies.append(Cookie(name, value)) - - return cookies - -cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$" -cookie_validname_re = re.compile(cookie_validname) -cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$' -cookie_validvalue_re = re.compile(cookie_validvalue) - -def generateCookie(cookies): - # There's a fundamental problem with the two cookie specifications. - # They both use the "Cookie" header, and the RFC Cookie header only allows - # one version to be specified. Thus, when you have a collection of V0 and - # V1 cookies, you have to either send them all as V0 or send them all as - # V1. - - # I choose to send them all as V1. - - # You might think converting a V0 cookie to a V1 cookie would be lossless, - # but you'd be wrong. If you do the conversion, and a V0 parser tries to - # read the cookie, it will see a modified form of the cookie, in cases - # where quotes must be added to conform to proper V1 syntax. - # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y") - - # However, that is what we will do, anyways. It has a high probability of - # breaking applications that only handle oldstyle cookies, where some other - # application set a newstyle cookie that is applicable over for site - # (or host), AND where the oldstyle cookie uses a value which is invalid - # syntax in a newstyle cookie. - - # Also, the cookie name *cannot* be quoted in V1, so some cookies just - # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These - # are just dicarded during conversion. - - # As this is an unsolvable problem, I will pretend I can just say - # OH WELL, don't do that, or else upgrade your old applications to have - # newstyle cookie parsers. - - # I will note offhandedly that there are *many* sites which send V0 cookies - # that are not valid V1 cookie syntax. About 20% for my cookies file. - # However, they do not generally mix them with V1 cookies, so this isn't - # an issue, at least right now. I have not tested to see how many of those - # webapps support RFC2965 V1 cookies. I suspect not many. - - max_version = max([cookie.version for cookie in cookies]) - - if max_version == 0: - # no quoting or anything. - return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies]) - else: - str_cookies = ['$Version="1"'] - for cookie in cookies: - if cookie.version == 0: - # Version 0 cookie: we make sure the name and value are valid - # V1 syntax. - - # If they are, we use them as is. This means in *most* cases, - # the cookie will look literally the same on output as it did - # on input. - # If it isn't a valid name, ignore the cookie. - # If it isn't a valid value, quote it and hope for the best on - # the other side. - - if cookie_validname_re.match(cookie.name) is None: - continue - - value = cookie.value - if cookie_validvalue_re.match(cookie.value) is None: - value = quoteString(value) - - str_cookies.append("%s=%s" % (cookie.name, value)) - else: - # V1 cookie, nice and easy - str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value))) - - if cookie.path: - str_cookies.append("$Path=%s" % quoteString(cookie.path)) - if cookie.domain: - str_cookies.append("$Domain=%s" % quoteString(cookie.domain)) - if cookie.ports is not None: - if len(cookie.ports) == 0: - str_cookies.append("$Port") - else: - str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) - return ';'.join(str_cookies) - -def parseSetCookie(headers): - setCookies = [] - for header in headers: - try: - parts = header.split(';') - l = [] - - for part in parts: - namevalue = part.split('=',1) - if len(namevalue) == 1: - name = namevalue[0] - value = None - else: - name, value = namevalue - value = value.strip(' \t') - - name = name.strip(' \t') - - l.append((name, value)) - - setCookies.append(makeCookieFromList(l, True)) - except ValueError: - # If we can't parse one Set-Cookie, ignore it, - # but not the rest of Set-Cookies. - pass - return setCookies - -def parseSetCookie2(toks): - outCookies = [] - for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))] - for y in split(toks, Token(','))]: - try: - outCookies.append(makeCookieFromList(cookie, False)) - except ValueError: - # Again, if we can't handle one cookie -- ignore it. - pass - return outCookies - -def makeCookieFromList(tup, netscapeFormat): - name, value = tup[0] - if name is None or value is None: - raise ValueError("Cookie has missing name or value") - if name.startswith("$"): - raise ValueError("Invalid cookie name: %r, starts with '$'." % name) - cookie = Cookie(name, value) - hadMaxAge = False - - for name, value in tup[1:]: - name = name.lower() - - if value is None: - if name in ("discard", "secure"): - # Boolean attrs - value = True - elif name != "port": - # Can be either boolean or explicit - continue - - if name in ("comment", "commenturl", "discard", "domain", "path", "secure"): - # simple cases - setattr(cookie, name, value) - elif name == "expires" and not hadMaxAge: - if netscapeFormat and value[0] == '"' and value[-1] == '"': - value = value[1:-1] - cookie.expires = parseDateTime(value) - elif name == "max-age": - hadMaxAge = True - cookie.expires = int(value) + time.time() - elif name == "port": - if value is None: - cookie.ports = () - else: - if netscapeFormat and value[0] == '"' and value[-1] == '"': - value = value[1:-1] - cookie.ports = tuple([int(s) for s in value.split(',')]) - elif name == "version": - cookie.version = int(value) - - return cookie - - -def generateSetCookie(cookies): - setCookies = [] - for cookie in cookies: - out = ["%s=%s" % (cookie.name, cookie.value)] - if cookie.expires: - out.append("expires=%s" % generateDateTime(cookie.expires)) - if cookie.path: - out.append("path=%s" % cookie.path) - if cookie.domain: - out.append("domain=%s" % cookie.domain) - if cookie.secure: - out.append("secure") - if cookie.httponly: - out.append("HttpOnly") - - setCookies.append('; '.join(out)) - return setCookies - -def generateSetCookie2(cookies): - setCookies = [] - for cookie in cookies: - out = ["%s=%s" % (cookie.name, quoteString(cookie.value))] - if cookie.comment: - out.append("Comment=%s" % quoteString(cookie.comment)) - if cookie.commenturl: - out.append("CommentURL=%s" % quoteString(cookie.commenturl)) - if cookie.discard: - out.append("Discard") - if cookie.domain: - out.append("Domain=%s" % quoteString(cookie.domain)) - if cookie.expires: - out.append("Max-Age=%s" % (cookie.expires - time.time())) - if cookie.path: - out.append("Path=%s" % quoteString(cookie.path)) - if cookie.ports is not None: - if len(cookie.ports) == 0: - out.append("Port") - else: - out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) - if cookie.secure: - out.append("Secure") - if cookie.httponly: - out.append("HttpOnly") - out.append('Version="1"') - setCookies.append('; '.join(out)) - return setCookies - -def parseDepth(depth): - if depth not in ("0", "1", "infinity"): - raise ValueError("Invalid depth header value: %s" % (depth,)) - return depth - -def parseOverWrite(overwrite): - if overwrite == "F": - return False - elif overwrite == "T": - return True - raise ValueError("Invalid overwrite header value: %s" % (overwrite,)) - -def generateOverWrite(overwrite): - if overwrite: - return "T" - else: - return "F" - -##### Random stuff that looks useful. -# def sortMimeQuality(s): -# def sorter(item1, item2): -# if item1[0] == '*': -# if item2[0] == '*': -# return 0 - - -# def sortQuality(s): -# def sorter(item1, item2): -# if item1[1] < item2[1]: -# return -1 -# if item1[1] < item2[1]: -# return 1 -# if item1[0] == item2[0]: -# return 0 - - -# def getMimeQuality(mimeType, accepts): -# type, args = parseArgs(mimeType) -# type = type.split(Token('/')) -# if len(type) != 2: -# raise ValueError, "MIME Type "+s+" invalid." - -# for accept in accepts: -# accept, acceptQual = accept -# acceptType = accept[0:1] -# acceptArgs = accept[2] - -# if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and -# (args == acceptArgs or len(acceptArgs) == 0)): -# return acceptQual - -# def getQuality(type, accepts): -# qual = accepts.get(type) -# if qual is not None: -# return qual - -# return accepts.get('*') - -# Headers object -class __RecalcNeeded(object): - def __repr__(self): - return "" - -_RecalcNeeded = __RecalcNeeded() - -class Headers(object): - """This class stores the HTTP headers as both a parsed representation and - the raw string representation. It converts between the two on demand.""" - - def __init__(self, headers=None, rawHeaders=None, handler=DefaultHTTPHandler): - self._raw_headers = {} - self._headers = {} - self.handler = handler - if headers is not None: - for key, value in headers.items(): - self.setHeader(key, value) - if rawHeaders is not None: - for key, value in rawHeaders.items(): - self.setRawHeaders(key, value) - - def _setRawHeaders(self, headers): - self._raw_headers = headers - self._headers = {} - - def _toParsed(self, name): - r = self._raw_headers.get(name, None) - h = self.handler.parse(name, r) - if h is not None: - self._headers[name] = h - return h - - def _toRaw(self, name): - h = self._headers.get(name, None) - r = self.handler.generate(name, h) - if r is not None: - assert isinstance(r, list) - for v in r: - assert isinstance(v, str) - self._raw_headers[name] = r - return r - - def __contains__(self, name): - """Does a header with the given name exist?""" - return name.lower() in self._raw_headers - - hasHeader = __contains__ - - def getRawHeaders(self, name, default=None): - """Returns a list of headers matching the given name as the raw string given.""" - - name = name.lower() - raw_header = self._raw_headers.get(name, default) - if raw_header is not _RecalcNeeded: - return raw_header - - return self._toRaw(name) - - def getHeader(self, name, default=None): - """Returns the parsed representation of the given header. - The exact form of the return value depends on the header in question. - - If no parser for the header exists, raise ValueError. - - If the header doesn't exist, return default (or None if not specified) - """ - name = name.lower() - parsed = self._headers.get(name, default) - if parsed is not _RecalcNeeded: - return parsed - return self._toParsed(name) - - def setRawHeaders(self, name, value): - """Sets the raw representation of the given header. - Value should be a list of strings, each being one header of the - given name. - """ - assert isinstance(value, list) - for v in value: - assert isinstance(v, str) - name = name.lower() - self._raw_headers[name] = value - self._headers[name] = _RecalcNeeded - - def setHeader(self, name, value): - """Sets the parsed representation of the given header. - Value should be a list of objects whose exact form depends - on the header in question. - """ - name = name.lower() - self._raw_headers[name] = _RecalcNeeded - self._headers[name] = value - - def addRawHeader(self, name, value): - """ - Add a raw value to a header that may or may not already exist. - If it exists, add it as a separate header to output; do not - replace anything. - """ - name = name.lower() - raw_header = self._raw_headers.get(name) - if raw_header is None: - # No header yet - raw_header = [] - self._raw_headers[name] = raw_header - elif raw_header is _RecalcNeeded: - raw_header = self._toRaw(name) - - raw_header.append(value) - self._headers[name] = _RecalcNeeded - - def addHeader(self, name, value): - """ - Add a parsed representatoin to a header that may or may not already exist. - If it exists, add it as a separate header to output; do not - replace anything. - """ - name = name.lower() - header = self._headers.get(name) - if header is None: - # No header yet - header = [] - self._headers[name] = header - elif header is _RecalcNeeded: - header = self._toParsed(name) - header.append(value) - self._raw_headers[name] = _RecalcNeeded - - def removeHeader(self, name): - """Removes the header named.""" - name = name.lower() - if name in self._raw_headers: - del self._raw_headers[name] - del self._headers[name] - - def __repr__(self): - return ''% (self._raw_headers, self._headers) - - def canonicalNameCaps(self, name): - """Return the name with the canonical capitalization, if known, - otherwise, Caps-After-Dashes""" - return header_case_mapping.get(name) or dashCapitalize(name) - - def getAllRawHeaders(self): - """Return an iterator of key, value pairs of all headers - contained in this object, as strings. The keys are capitalized - in canonical capitalization.""" - for k, v in self._raw_headers.items(): - if v is _RecalcNeeded: - v = self._toRaw(k) - yield self.canonicalNameCaps(k), v - - def makeImmutable(self): - """Make this header set immutable. All mutating operations will - raise an exception.""" - self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise - - def _mutateRaise(self, *args): - raise AttributeError("This header object is immutable as the headers have already been sent.") - - -"""The following dicts are all mappings of header to list of operations - to perform. The first operation should generally be 'tokenize' if the - header can be parsed according to the normal tokenization rules. If - it cannot, generally the first thing you want to do is take only the - last instance of the header (in case it was sent multiple times, which - is strictly an error, but we're nice.). - """ - -iteritems = lambda x: x.items() - - -parser_general_headers = { - 'Cache-Control': (tokenize, listParser(parseCacheControl), dict), - 'Connection': (tokenize, filterTokens), - 'Date': (last, parseDateTime), -# 'Pragma':tokenize -# 'Trailer':tokenize - 'Transfer-Encoding': (tokenize, filterTokens), -# 'Upgrade':tokenize -# 'Via':tokenize, stripComment -# 'Warning':tokenize -} - -generator_general_headers = { - 'Cache-Control': (iteritems, listGenerator(generateCacheControl), singleHeader), - 'Connection': (generateList, singleHeader), - 'Date': (generateDateTime, singleHeader), -# 'Pragma': -# 'Trailer': - 'Transfer-Encoding': (generateList, singleHeader), -# 'Upgrade': -# 'Via': -# 'Warning': -} - -parser_request_headers = { - 'Accept': (tokenize, listParser(parseAccept), dict), - 'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset), - 'Accept-Encoding': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding), - 'Accept-Language': (tokenize, listParser(parseAcceptQvalue), dict), - 'Access-Control-Request-Method': (parseHTTPMethod, ), - 'Access-Control-Request-Headers': (filterTokens, ), - 'Authorization': (last, parseAuthorization), - 'Cookie': (parseCookie,), - 'Expect': (tokenize, listParser(parseExpect), dict), - 'Origin': (last,), - 'From': (last,), - 'Host': (last,), - 'If-Match': (tokenize, listParser(parseStarOrETag), list), - 'If-Modified-Since': (last, parseIfModifiedSince), - 'If-None-Match': (tokenize, listParser(parseStarOrETag), list), - 'If-Range': (parseIfRange,), - 'If-Unmodified-Since': (last, parseDateTime), - 'Max-Forwards': (last, int), -# 'Proxy-Authorization':str, # what is "credentials" - 'Range': (tokenize, parseRange), - 'Referer': (last, str), # TODO: URI object? - 'TE': (tokenize, listParser(parseAcceptQvalue), dict), - 'User-Agent': (last, str), -} - -generator_request_headers = { - 'Accept': (iteritems, listGenerator(generateAccept), singleHeader), - 'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue), singleHeader), - 'Accept-Encoding': (iteritems, removeDefaultEncoding, - listGenerator(generateAcceptQvalue), singleHeader), - 'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue), singleHeader), - 'Access-Control-Request-Method': (unique, str, singleHeader, ), - 'Authorization': (generateAuthorization,), # what is "credentials" - 'Cookie': (generateCookie, singleHeader), - 'Expect': (iteritems, listGenerator(generateExpect), singleHeader), - 'From': (unique, str, singleHeader), - 'Host': (unique, str, singleHeader), - 'If-Match': (listGenerator(generateStarOrETag), singleHeader), - 'If-Modified-Since': (generateDateTime, singleHeader), - 'If-None-Match': (listGenerator(generateStarOrETag), singleHeader), - 'If-Range': (generateIfRange, singleHeader), - 'If-Unmodified-Since': (generateDateTime, singleHeader), - 'Max-Forwards': (unique, str, singleHeader), - 'Origin': (unique, str, singleHeader), -# 'Proxy-Authorization':str, # what is "credentials" - 'Range': (generateRange, singleHeader), - 'Referer': (unique, str, singleHeader), - 'TE': (iteritems, listGenerator(generateAcceptQvalue), singleHeader), - 'User-Agent': (unique, str, singleHeader), -} - -parser_response_headers = { - 'Accept-Ranges': (tokenize, filterTokens), - 'Access-Control-Allow-Origin': (last, parseAllowOrigin,), - 'Access-Control-Allow-Credentials': (last, parseAllowCreds,), - 'Access-Control-Allow-Methods': (tokenize, listParser(parseHTTPMethod), list), - 'Access-Control-Allow-Headers': (listGenerator(str), ), - 'Access-Control-Expose-Headers': (filterTokens, ), - 'Age': (last, int), - 'ETag': (tokenize, ETag.parse), - 'Location': (last,), # TODO: URI object? -# 'Proxy-Authenticate' - 'Retry-After': (last, parseRetryAfter), - 'Server': (last,), - 'Set-Cookie': (parseSetCookie,), - 'Set-Cookie2': (tokenize, parseSetCookie2), - 'Vary': (tokenize, filterTokens), - 'WWW-Authenticate': (lambda h: tokenize(h, foldCase=False), - parseWWWAuthenticate,) -} - -generator_response_headers = { - 'Accept-Ranges': (generateList, singleHeader), - 'Access-Control-Allow-Origin': (unique, str, singleHeader), - 'Access-Control-Allow-Credentials': (generateTrueFalse, singleHeader), - 'Access-Control-Allow-Headers': (set, generateList, singleHeader), - 'Access-Control-Allow-Methods': (set, generateList, singleHeader), - 'Access-Control-Expose-Headers': (set, generateList, singleHeader), - 'Age': (unique, str, singleHeader), - 'ETag': (ETag.generate, singleHeader), - 'Location': (unique, str, singleHeader), -# 'Proxy-Authenticate' - 'Retry-After': (generateRetryAfter, singleHeader), - 'Server': (unique, str, singleHeader), - 'Set-Cookie': (generateSetCookie,), - 'Set-Cookie2': (generateSetCookie2,), - 'Vary': (set, generateList, singleHeader), - 'WWW-Authenticate': (generateWWWAuthenticate,) -} - -parser_entity_headers = { - 'Allow': (lambda str:tokenize(str, foldCase=False), filterTokens), - 'Content-Encoding': (tokenize, filterTokens), - 'Content-Language': (tokenize, filterTokens), - 'Content-Length': (last, int), - 'Content-Location': (last,), # TODO: URI object? - 'Content-MD5': (last, parseContentMD5), - 'Content-Range': (last, parseContentRange), - 'Content-Type': (lambda str:tokenize(str, foldCase=False), parseContentType), - 'Expires': (last, parseExpires), - 'Last-Modified': (last, parseDateTime), - } - -generator_entity_headers = { - 'Allow': (generateList, singleHeader), - 'Content-Encoding': (generateList, singleHeader), - 'Content-Language': (generateList, singleHeader), - 'Content-Length': (unique, str, singleHeader), - 'Content-Location': (unique, str, singleHeader), - 'Content-MD5': (base64.encodestring, lambda x: x.strip("\n"), singleHeader), - 'Content-Range': (generateContentRange, singleHeader), - 'Content-Type': (generateContentType, singleHeader), - 'Expires': (generateDateTime, singleHeader), - 'Last-Modified': (generateDateTime, singleHeader), - } - -DefaultHTTPHandler.updateParsers(parser_general_headers) -DefaultHTTPHandler.updateParsers(parser_request_headers) -DefaultHTTPHandler.updateParsers(parser_response_headers) -DefaultHTTPHandler.updateParsers(parser_entity_headers) - -DefaultHTTPHandler.updateGenerators(generator_general_headers) -DefaultHTTPHandler.updateGenerators(generator_request_headers) -DefaultHTTPHandler.updateGenerators(generator_response_headers) -DefaultHTTPHandler.updateGenerators(generator_entity_headers) - - -# casemappingify(DefaultHTTPParsers) -# casemappingify(DefaultHTTPGenerators) - -# lowerify(DefaultHTTPParsers) -# lowerify(DefaultHTTPGenerators)