web/http_headers.py
changeset 5155 1dea6e0fdfc1
child 7855 54283a5b7afc
equal deleted inserted replaced
5125:eaec839ad3fe 5155:1dea6e0fdfc1
       
     1 # This file has been extracted from the abandoned TwistedWeb2 project
       
     2 # http://twistedmatrix.com/trac/wiki/TwistedWeb2
       
     3 
       
     4 
       
     5 from __future__ import generators
       
     6 
       
     7 import types, time
       
     8 from calendar import timegm
       
     9 import base64
       
    10 import re
       
    11 
       
    12 def dashCapitalize(s):
       
    13     ''' Capitalize a string, making sure to treat - as a word seperator '''
       
    14     return '-'.join([ x.capitalize() for x in s.split('-')])
       
    15 
       
    16 # datetime parsing and formatting
       
    17 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
       
    18 weekdayname_lower = [name.lower() for name in weekdayname]
       
    19 monthname = [None,
       
    20              'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
       
    21              'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
       
    22 monthname_lower = [name and name.lower() for name in monthname]
       
    23 
       
    24 # HTTP Header parsing API
       
    25 
       
    26 header_case_mapping = {}
       
    27 
       
    28 def casemappingify(d):
       
    29     global header_case_mapping
       
    30     newd = dict([(key.lower(),key) for key in d.keys()])
       
    31     header_case_mapping.update(newd)
       
    32 
       
    33 def lowerify(d):
       
    34     return dict([(key.lower(),value) for key,value in d.items()])
       
    35 
       
    36 
       
    37 class HeaderHandler(object):
       
    38     """HeaderHandler manages header generating and parsing functions.
       
    39     """
       
    40     HTTPParsers = {}
       
    41     HTTPGenerators = {}
       
    42 
       
    43     def __init__(self, parsers=None, generators=None):
       
    44         """
       
    45         @param parsers: A map of header names to parsing functions.
       
    46         @type parsers: L{dict}
       
    47 
       
    48         @param generators: A map of header names to generating functions.
       
    49         @type generators: L{dict}
       
    50         """
       
    51 
       
    52         if parsers:
       
    53             self.HTTPParsers.update(parsers)
       
    54         if generators:
       
    55             self.HTTPGenerators.update(generators)
       
    56 
       
    57     def parse(self, name, header):
       
    58         """
       
    59         Parse the given header based on its given name.
       
    60 
       
    61         @param name: The header name to parse.
       
    62         @type name: C{str}
       
    63 
       
    64         @param header: A list of unparsed headers.
       
    65         @type header: C{list} of C{str}
       
    66 
       
    67         @return: The return value is the parsed header representation,
       
    68             it is dependent on the header.  See the HTTP Headers document.
       
    69         """
       
    70         parser = self.HTTPParsers.get(name, None)
       
    71         if parser is None:
       
    72             raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,))
       
    73 
       
    74         try:
       
    75             for p in parser:
       
    76                 # print "Parsing %s: %s(%s)" % (name, repr(p), repr(h))
       
    77                 header = p(header)
       
    78                 # if isinstance(h, types.GeneratorType):
       
    79                 #     h=list(h)
       
    80         except ValueError,v:
       
    81             # print v
       
    82             header=None
       
    83 
       
    84         return header
       
    85 
       
    86     def generate(self, name, header):
       
    87         """
       
    88         Generate the given header based on its given name.
       
    89 
       
    90         @param name: The header name to generate.
       
    91         @type name: C{str}
       
    92 
       
    93         @param header: A parsed header, such as the output of
       
    94             L{HeaderHandler}.parse.
       
    95 
       
    96         @return: C{list} of C{str} each representing a generated HTTP header.
       
    97         """
       
    98         generator = self.HTTPGenerators.get(name, None)
       
    99 
       
   100         if generator is None:
       
   101             # print self.generators
       
   102             raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,))
       
   103 
       
   104         for g in generator:
       
   105             header = g(header)
       
   106 
       
   107         #self._raw_headers[name] = h
       
   108         return header
       
   109 
       
   110     def updateParsers(self, parsers):
       
   111         """Update en masse the parser maps.
       
   112 
       
   113         @param parsers: Map of header names to parser chains.
       
   114         @type parsers: C{dict}
       
   115         """
       
   116         casemappingify(parsers)
       
   117         self.HTTPParsers.update(lowerify(parsers))
       
   118 
       
   119     def addParser(self, name, value):
       
   120         """Add an individual parser chain for the given header.
       
   121 
       
   122         @param name: Name of the header to add
       
   123         @type name: C{str}
       
   124 
       
   125         @param value: The parser chain
       
   126         @type value: C{str}
       
   127         """
       
   128         self.updateParsers({name: value})
       
   129 
       
   130     def updateGenerators(self, generators):
       
   131         """Update en masse the generator maps.
       
   132 
       
   133         @param parsers: Map of header names to generator chains.
       
   134         @type parsers: C{dict}
       
   135         """
       
   136         casemappingify(generators)
       
   137         self.HTTPGenerators.update(lowerify(generators))
       
   138 
       
   139     def addGenerators(self, name, value):
       
   140         """Add an individual generator chain for the given header.
       
   141 
       
   142         @param name: Name of the header to add
       
   143         @type name: C{str}
       
   144 
       
   145         @param value: The generator chain
       
   146         @type value: C{str}
       
   147         """
       
   148         self.updateGenerators({name: value})
       
   149 
       
   150     def update(self, parsers, generators):
       
   151         """Conveniently update parsers and generators all at once.
       
   152         """
       
   153         self.updateParsers(parsers)
       
   154         self.updateGenerators(generators)
       
   155 
       
   156 
       
   157 DefaultHTTPHandler = HeaderHandler()
       
   158 
       
   159 
       
   160 ## HTTP DateTime parser
       
   161 def parseDateTime(dateString):
       
   162     """Convert an HTTP date string (one of three formats) to seconds since epoch."""
       
   163     parts = dateString.split()
       
   164 
       
   165     if not parts[0][0:3].lower() in weekdayname_lower:
       
   166         # Weekday is stupid. Might have been omitted.
       
   167         try:
       
   168             return parseDateTime("Sun, "+dateString)
       
   169         except ValueError:
       
   170             # Guess not.
       
   171             pass
       
   172 
       
   173     partlen = len(parts)
       
   174     if (partlen == 5 or partlen == 6) and parts[1].isdigit():
       
   175         # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT
       
   176         # (Note: "GMT" is literal, not a variable timezone)
       
   177         # (also handles without "GMT")
       
   178         # This is the normal format
       
   179         day = parts[1]
       
   180         month = parts[2]
       
   181         year = parts[3]
       
   182         time = parts[4]
       
   183     elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1:
       
   184         # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT
       
   185         # (Note: "GMT" is literal, not a variable timezone)
       
   186         # (also handles without without "GMT")
       
   187         # Two digit year, yucko.
       
   188         day, month, year = parts[1].split('-')
       
   189         time = parts[2]
       
   190         year=int(year)
       
   191         if year < 69:
       
   192             year = year + 2000
       
   193         elif year < 100:
       
   194             year = year + 1900
       
   195     elif len(parts) == 5:
       
   196         # 3rd date format: Sun Nov  6 08:49:37 1994
       
   197         # ANSI C asctime() format.
       
   198         day = parts[2]
       
   199         month = parts[1]
       
   200         year = parts[4]
       
   201         time = parts[3]
       
   202     else:
       
   203         raise ValueError("Unknown datetime format %r" % dateString)
       
   204 
       
   205     day = int(day)
       
   206     month = int(monthname_lower.index(month.lower()))
       
   207     year = int(year)
       
   208     hour, min, sec = map(int, time.split(':'))
       
   209     return int(timegm((year, month, day, hour, min, sec)))
       
   210 
       
   211 
       
   212 ##### HTTP tokenizer
       
   213 class Token(str):
       
   214     __slots__=[]
       
   215     tokens = {}
       
   216     def __new__(self, char):
       
   217         token = Token.tokens.get(char)
       
   218         if token is None:
       
   219             Token.tokens[char] = token = str.__new__(self, char)
       
   220         return token
       
   221 
       
   222     def __repr__(self):
       
   223         return "Token(%s)" % str.__repr__(self)
       
   224 
       
   225 
       
   226 http_tokens = " \t\"()<>@,;:\\/[]?={}"
       
   227 http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f"
       
   228 
       
   229 def tokenize(header, foldCase=True):
       
   230     """Tokenize a string according to normal HTTP header parsing rules.
       
   231 
       
   232     In particular:
       
   233      - Whitespace is irrelevant and eaten next to special separator tokens.
       
   234        Its existance (but not amount) is important between character strings.
       
   235      - Quoted string support including embedded backslashes.
       
   236      - Case is insignificant (and thus lowercased), except in quoted strings.
       
   237         (unless foldCase=False)
       
   238      - Multiple headers are concatenated with ','
       
   239 
       
   240     NOTE: not all headers can be parsed with this function.
       
   241 
       
   242     Takes a raw header value (list of strings), and
       
   243     Returns a generator of strings and Token class instances.
       
   244     """
       
   245     tokens=http_tokens
       
   246     ctls=http_ctls
       
   247 
       
   248     string = ",".join(header)
       
   249     list = []
       
   250     start = 0
       
   251     cur = 0
       
   252     quoted = False
       
   253     qpair = False
       
   254     inSpaces = -1
       
   255     qstring = None
       
   256 
       
   257     for x in string:
       
   258         if quoted:
       
   259             if qpair:
       
   260                 qpair = False
       
   261                 qstring = qstring+string[start:cur-1]+x
       
   262                 start = cur+1
       
   263             elif x == '\\':
       
   264                 qpair = True
       
   265             elif x == '"':
       
   266                 quoted = False
       
   267                 yield qstring+string[start:cur]
       
   268                 qstring=None
       
   269                 start = cur+1
       
   270         elif x in tokens:
       
   271             if start != cur:
       
   272                 if foldCase:
       
   273                     yield string[start:cur].lower()
       
   274                 else:
       
   275                     yield string[start:cur]
       
   276 
       
   277             start = cur+1
       
   278             if x == '"':
       
   279                 quoted = True
       
   280                 qstring = ""
       
   281                 inSpaces = False
       
   282             elif x in " \t":
       
   283                 if inSpaces is False:
       
   284                     inSpaces = True
       
   285             else:
       
   286                 inSpaces = -1
       
   287                 yield Token(x)
       
   288         elif x in ctls:
       
   289             raise ValueError("Invalid control character: %d in header" % ord(x))
       
   290         else:
       
   291             if inSpaces is True:
       
   292                 yield Token(' ')
       
   293                 inSpaces = False
       
   294 
       
   295             inSpaces = False
       
   296         cur = cur+1
       
   297 
       
   298     if qpair:
       
   299         raise ValueError, "Missing character after '\\'"
       
   300     if quoted:
       
   301         raise ValueError, "Missing end quote"
       
   302 
       
   303     if start != cur:
       
   304         if foldCase:
       
   305             yield string[start:cur].lower()
       
   306         else:
       
   307             yield string[start:cur]
       
   308 
       
   309 def split(seq, delim):
       
   310     """The same as str.split but works on arbitrary sequences.
       
   311     Too bad it's not builtin to python!"""
       
   312 
       
   313     cur = []
       
   314     for item in seq:
       
   315         if item == delim:
       
   316             yield cur
       
   317             cur = []
       
   318         else:
       
   319             cur.append(item)
       
   320     yield cur
       
   321 
       
   322 # def find(seq, *args):
       
   323 #     """The same as seq.index but returns -1 if not found, instead
       
   324 #     Too bad it's not builtin to python!"""
       
   325 #     try:
       
   326 #         return seq.index(value, *args)
       
   327 #     except ValueError:
       
   328 #         return -1
       
   329 
       
   330 
       
   331 def filterTokens(seq):
       
   332     """Filter out instances of Token, leaving only a list of strings.
       
   333 
       
   334     Used instead of a more specific parsing method (e.g. splitting on commas)
       
   335     when only strings are expected, so as to be a little lenient.
       
   336 
       
   337     Apache does it this way and has some comments about broken clients which
       
   338     forget commas (?), so I'm doing it the same way. It shouldn't
       
   339     hurt anything, in any case.
       
   340     """
       
   341 
       
   342     l=[]
       
   343     for x in seq:
       
   344         if not isinstance(x, Token):
       
   345             l.append(x)
       
   346     return l
       
   347 
       
   348 ##### parser utilities:
       
   349 def checkSingleToken(tokens):
       
   350     if len(tokens) != 1:
       
   351         raise ValueError, "Expected single token, not %s." % (tokens,)
       
   352     return tokens[0]
       
   353 
       
   354 def parseKeyValue(val):
       
   355     if len(val) == 1:
       
   356         return val[0],None
       
   357     elif len(val) == 3 and val[1] == Token('='):
       
   358         return val[0],val[2]
       
   359     raise ValueError, "Expected key or key=value, but got %s." % (val,)
       
   360 
       
   361 def parseArgs(field):
       
   362     args=split(field, Token(';'))
       
   363     val = args.next()
       
   364     args = [parseKeyValue(arg) for arg in args]
       
   365     return val,args
       
   366 
       
   367 def listParser(fun):
       
   368     """Return a function which applies 'fun' to every element in the
       
   369     comma-separated list"""
       
   370     def listParserHelper(tokens):
       
   371         fields = split(tokens, Token(','))
       
   372         for field in fields:
       
   373             if len(field) != 0:
       
   374                 yield fun(field)
       
   375 
       
   376     return listParserHelper
       
   377 
       
   378 def last(seq):
       
   379     """Return seq[-1]"""
       
   380 
       
   381     return seq[-1]
       
   382 
       
   383 ##### Generation utilities
       
   384 def quoteString(s):
       
   385     return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"')
       
   386 
       
   387 def listGenerator(fun):
       
   388     """Return a function which applies 'fun' to every element in
       
   389     the given list, then joins the result with generateList"""
       
   390     def listGeneratorHelper(l):
       
   391         return generateList([fun(e) for e in l])
       
   392 
       
   393     return listGeneratorHelper
       
   394 
       
   395 def generateList(seq):
       
   396     return ", ".join(seq)
       
   397 
       
   398 def singleHeader(item):
       
   399     return [item]
       
   400 
       
   401 def generateKeyValues(kvs):
       
   402     l = []
       
   403     # print kvs
       
   404     for k,v in kvs:
       
   405         if v is None:
       
   406             l.append('%s' % k)
       
   407         else:
       
   408             l.append('%s=%s' % (k,v))
       
   409     return ";".join(l)
       
   410 
       
   411 
       
   412 class MimeType(object):
       
   413     def fromString(klass, mimeTypeString):
       
   414         """Generate a MimeType object from the given string.
       
   415 
       
   416         @param mimeTypeString: The mimetype to parse
       
   417 
       
   418         @return: L{MimeType}
       
   419         """
       
   420         return DefaultHTTPHandler.parse('content-type', [mimeTypeString])
       
   421 
       
   422     fromString = classmethod(fromString)
       
   423 
       
   424     def __init__(self, mediaType, mediaSubtype, params={}, **kwargs):
       
   425         """
       
   426         @type mediaType: C{str}
       
   427 
       
   428         @type mediaSubtype: C{str}
       
   429 
       
   430         @type params: C{dict}
       
   431         """
       
   432         self.mediaType = mediaType
       
   433         self.mediaSubtype = mediaSubtype
       
   434         self.params = dict(params)
       
   435 
       
   436         if kwargs:
       
   437             self.params.update(kwargs)
       
   438 
       
   439     def __eq__(self, other):
       
   440         if not isinstance(other, MimeType): return NotImplemented
       
   441         return (self.mediaType == other.mediaType and
       
   442                 self.mediaSubtype == other.mediaSubtype and
       
   443                 self.params == other.params)
       
   444 
       
   445     def __ne__(self, other):
       
   446         return not self.__eq__(other)
       
   447 
       
   448     def __repr__(self):
       
   449         return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params)
       
   450 
       
   451     def __hash__(self):
       
   452         return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.iteritems()))
       
   453 
       
   454 ##### Specific header parsers.
       
   455 def parseAccept(field):
       
   456     type,args = parseArgs(field)
       
   457 
       
   458     if len(type) != 3 or type[1] != Token('/'):
       
   459         raise ValueError, "MIME Type "+str(type)+" invalid."
       
   460 
       
   461     # okay, this spec is screwy. A 'q' parameter is used as the separator
       
   462     # between MIME parameters and (as yet undefined) additional HTTP
       
   463     # parameters.
       
   464 
       
   465     num = 0
       
   466     for arg in args:
       
   467         if arg[0] == 'q':
       
   468             mimeparams=tuple(args[0:num])
       
   469             params=args[num:]
       
   470             break
       
   471         num = num + 1
       
   472     else:
       
   473         mimeparams=tuple(args)
       
   474         params=[]
       
   475 
       
   476     # Default values for parameters:
       
   477     qval = 1.0
       
   478 
       
   479     # Parse accept parameters:
       
   480     for param in params:
       
   481         if param[0] =='q':
       
   482             qval = float(param[1])
       
   483         else:
       
   484             # Warn? ignored parameter.
       
   485             pass
       
   486 
       
   487     ret = MimeType(type[0],type[2],mimeparams),qval
       
   488     return ret
       
   489 
       
   490 def parseAcceptQvalue(field):
       
   491     type,args=parseArgs(field)
       
   492 
       
   493     type = checkSingleToken(type)
       
   494 
       
   495     qvalue = 1.0 # Default qvalue is 1
       
   496     for arg in args:
       
   497         if arg[0] == 'q':
       
   498             qvalue = float(arg[1])
       
   499     return type,qvalue
       
   500 
       
   501 def addDefaultCharset(charsets):
       
   502     if charsets.get('*') is None and charsets.get('iso-8859-1') is None:
       
   503         charsets['iso-8859-1'] = 1.0
       
   504     return charsets
       
   505 
       
   506 def addDefaultEncoding(encodings):
       
   507     if encodings.get('*') is None and encodings.get('identity') is None:
       
   508         # RFC doesn't specify a default value for identity, only that it
       
   509         # "is acceptable" if not mentioned. Thus, give it a very low qvalue.
       
   510         encodings['identity'] = .0001
       
   511     return encodings
       
   512 
       
   513 
       
   514 def parseContentType(header):
       
   515     # Case folding is disabled for this header, because of use of
       
   516     # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf
       
   517     # So, we need to explicitly .lower() the type/subtype and arg keys.
       
   518 
       
   519     type,args = parseArgs(header)
       
   520 
       
   521     if len(type) != 3 or type[1] != Token('/'):
       
   522         raise ValueError, "MIME Type "+str(type)+" invalid."
       
   523 
       
   524     args = [(kv[0].lower(), kv[1]) for kv in args]
       
   525 
       
   526     return MimeType(type[0].lower(), type[2].lower(), tuple(args))
       
   527 
       
   528 def parseContentMD5(header):
       
   529     try:
       
   530         return base64.decodestring(header)
       
   531     except Exception,e:
       
   532         raise ValueError(e)
       
   533 
       
   534 def parseContentRange(header):
       
   535     """Parse a content-range header into (kind, start, end, realLength).
       
   536 
       
   537     realLength might be None if real length is not known ('*').
       
   538     start and end might be None if start,end unspecified (for response code 416)
       
   539     """
       
   540     kind, other = header.strip().split()
       
   541     if kind.lower() != "bytes":
       
   542         raise ValueError("a range of type %r is not supported")
       
   543     startend, realLength = other.split("/")
       
   544     if startend.strip() == '*':
       
   545         start,end=None,None
       
   546     else:
       
   547         start, end = map(int, startend.split("-"))
       
   548     if realLength == "*":
       
   549         realLength = None
       
   550     else:
       
   551         realLength = int(realLength)
       
   552     return (kind, start, end, realLength)
       
   553 
       
   554 def parseExpect(field):
       
   555     type,args=parseArgs(field)
       
   556 
       
   557     type=parseKeyValue(type)
       
   558     return (type[0], (lambda *args:args)(type[1], *args))
       
   559 
       
   560 def parseExpires(header):
       
   561     # """HTTP/1.1 clients and caches MUST treat other invalid date formats,
       
   562     #    especially including the value 0, as in the past (i.e., "already expired")."""
       
   563 
       
   564     try:
       
   565         return parseDateTime(header)
       
   566     except ValueError:
       
   567         return 0
       
   568 
       
   569 def parseIfModifiedSince(header):
       
   570     # Ancient versions of netscape and *current* versions of MSIE send
       
   571     #   If-Modified-Since: Thu, 05 Aug 2004 12:57:27 GMT; length=123
       
   572     # which is blantantly RFC-violating and not documented anywhere
       
   573     # except bug-trackers for web frameworks.
       
   574 
       
   575     # So, we'll just strip off everything after a ';'.
       
   576     return parseDateTime(header.split(';', 1)[0])
       
   577 
       
   578 def parseIfRange(headers):
       
   579     try:
       
   580         return ETag.parse(tokenize(headers))
       
   581     except ValueError:
       
   582         return parseDateTime(last(headers))
       
   583 
       
   584 def parseRange(range):
       
   585     range = list(range)
       
   586     if len(range) < 3 or range[1] != Token('='):
       
   587         raise ValueError("Invalid range header format: %s" %(range,))
       
   588 
       
   589     type=range[0]
       
   590     if type != 'bytes':
       
   591         raise ValueError("Unknown range unit: %s." % (type,))
       
   592     rangeset=split(range[2:], Token(','))
       
   593     ranges = []
       
   594 
       
   595     for byterangespec in rangeset:
       
   596         if len(byterangespec) != 1:
       
   597             raise ValueError("Invalid range header format: %s" % (range,))
       
   598         start,end=byterangespec[0].split('-')
       
   599 
       
   600         if not start and not end:
       
   601             raise ValueError("Invalid range header format: %s" % (range,))
       
   602 
       
   603         if start:
       
   604             start = int(start)
       
   605         else:
       
   606             start = None
       
   607 
       
   608         if end:
       
   609             end = int(end)
       
   610         else:
       
   611             end = None
       
   612 
       
   613         if start and end and start > end:
       
   614             raise ValueError("Invalid range header, start > end: %s" % (range,))
       
   615         ranges.append((start,end))
       
   616     return type,ranges
       
   617 
       
   618 def parseRetryAfter(header):
       
   619     try:
       
   620         # delta seconds
       
   621         return time.time() + int(header)
       
   622     except ValueError:
       
   623         # or datetime
       
   624         return parseDateTime(header)
       
   625 
       
   626 # WWW-Authenticate and Authorization
       
   627 
       
   628 def parseWWWAuthenticate(tokenized):
       
   629     headers = []
       
   630 
       
   631     tokenList = list(tokenized)
       
   632 
       
   633     while tokenList:
       
   634         scheme = tokenList.pop(0)
       
   635         challenge = {}
       
   636         last = None
       
   637         kvChallenge = False
       
   638 
       
   639         while tokenList:
       
   640             token = tokenList.pop(0)
       
   641             if token == Token('='):
       
   642                 kvChallenge = True
       
   643                 challenge[last] = tokenList.pop(0)
       
   644                 last = None
       
   645 
       
   646             elif token == Token(','):
       
   647                 if kvChallenge:
       
   648                     if len(tokenList) > 1 and tokenList[1] != Token('='):
       
   649                         break
       
   650 
       
   651                 else:
       
   652                     break
       
   653 
       
   654             else:
       
   655                 last = token
       
   656 
       
   657         if last and scheme and not challenge and not kvChallenge:
       
   658             challenge = last
       
   659             last = None
       
   660 
       
   661         headers.append((scheme, challenge))
       
   662 
       
   663     if last and last not in (Token('='), Token(',')):
       
   664         if headers[-1] == (scheme, challenge):
       
   665             scheme = last
       
   666             challenge = {}
       
   667             headers.append((scheme, challenge))
       
   668 
       
   669     return headers
       
   670 
       
   671 def parseAuthorization(header):
       
   672     scheme, rest = header.split(' ', 1)
       
   673     # this header isn't tokenized because it may eat characters
       
   674     # in the unquoted base64 encoded credentials
       
   675     return scheme.lower(), rest
       
   676 
       
   677 #### Header generators
       
   678 def generateAccept(accept):
       
   679     mimeType,q = accept
       
   680 
       
   681     out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype)
       
   682     if mimeType.params:
       
   683         out+=';'+generateKeyValues(mimeType.params.iteritems())
       
   684 
       
   685     if q != 1.0:
       
   686         out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.')
       
   687 
       
   688     return out
       
   689 
       
   690 def removeDefaultEncoding(seq):
       
   691     for item in seq:
       
   692         if item[0] != 'identity' or item[1] != .0001:
       
   693             yield item
       
   694 
       
   695 def generateAcceptQvalue(keyvalue):
       
   696     if keyvalue[1] == 1.0:
       
   697         return "%s" % keyvalue[0:1]
       
   698     else:
       
   699         return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.')
       
   700 
       
   701 def parseCacheControl(kv):
       
   702     k, v = parseKeyValue(kv)
       
   703     if k == 'max-age' or k == 'min-fresh' or k == 's-maxage':
       
   704         # Required integer argument
       
   705         if v is None:
       
   706             v = 0
       
   707         else:
       
   708             v = int(v)
       
   709     elif k == 'max-stale':
       
   710         # Optional integer argument
       
   711         if v is not None:
       
   712             v = int(v)
       
   713     elif k == 'private' or k == 'no-cache':
       
   714         # Optional list argument
       
   715         if v is not None:
       
   716             v = [field.strip().lower() for field in v.split(',')]
       
   717     return k, v
       
   718 
       
   719 def generateCacheControl((k, v)):
       
   720     if v is None:
       
   721         return str(k)
       
   722     else:
       
   723         if k == 'no-cache' or k == 'private':
       
   724             # quoted list of values
       
   725             v = quoteString(generateList(
       
   726                 [header_case_mapping.get(name) or dashCapitalize(name) for name in v]))
       
   727         return '%s=%s' % (k,v)
       
   728 
       
   729 def generateContentRange(tup):
       
   730     """tup is (type, start, end, len)
       
   731     len can be None.
       
   732     """
       
   733     type, start, end, len = tup
       
   734     if len == None:
       
   735         len = '*'
       
   736     else:
       
   737         len = int(len)
       
   738     if start == None and end == None:
       
   739         startend = '*'
       
   740     else:
       
   741         startend = '%d-%d' % (start, end)
       
   742 
       
   743     return '%s %s/%s' % (type, startend, len)
       
   744 
       
   745 def generateDateTime(secSinceEpoch):
       
   746     """Convert seconds since epoch to HTTP datetime string."""
       
   747     year, month, day, hh, mm, ss, wd, y, z = time.gmtime(secSinceEpoch)
       
   748     s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % (
       
   749         weekdayname[wd],
       
   750         day, monthname[month], year,
       
   751         hh, mm, ss)
       
   752     return s
       
   753 
       
   754 def generateExpect(item):
       
   755     if item[1][0] is None:
       
   756         out = '%s' % (item[0],)
       
   757     else:
       
   758         out = '%s=%s' % (item[0], item[1][0])
       
   759     if len(item[1]) > 1:
       
   760         out += ';'+generateKeyValues(item[1][1:])
       
   761     return out
       
   762 
       
   763 def generateRange(range):
       
   764     def noneOr(s):
       
   765         if s is None:
       
   766             return ''
       
   767         return s
       
   768 
       
   769     type,ranges=range
       
   770 
       
   771     if type != 'bytes':
       
   772         raise ValueError("Unknown range unit: "+type+".")
       
   773 
       
   774     return (type+'='+
       
   775             ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1]))
       
   776                       for startend in ranges]))
       
   777 
       
   778 def generateRetryAfter(when):
       
   779     # always generate delta seconds format
       
   780     return str(int(when - time.time()))
       
   781 
       
   782 def generateContentType(mimeType):
       
   783     out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype)
       
   784     if mimeType.params:
       
   785         out+=';'+generateKeyValues(mimeType.params.iteritems())
       
   786     return out
       
   787 
       
   788 def generateIfRange(dateOrETag):
       
   789     if isinstance(dateOrETag, ETag):
       
   790         return dateOrETag.generate()
       
   791     else:
       
   792         return generateDateTime(dateOrETag)
       
   793 
       
   794 # WWW-Authenticate and Authorization
       
   795 
       
   796 def generateWWWAuthenticate(headers):
       
   797     _generated = []
       
   798     for seq in headers:
       
   799         scheme, challenge = seq[0], seq[1]
       
   800 
       
   801         # If we're going to parse out to something other than a dict
       
   802         # we need to be able to generate from something other than a dict
       
   803 
       
   804         try:
       
   805             l = []
       
   806             for k,v in dict(challenge).iteritems():
       
   807                 l.append("%s=%s" % (k, quoteString(v)))
       
   808 
       
   809             _generated.append("%s %s" % (scheme, ", ".join(l)))
       
   810         except ValueError:
       
   811             _generated.append("%s %s" % (scheme, challenge))
       
   812 
       
   813     return _generated
       
   814 
       
   815 def generateAuthorization(seq):
       
   816     return [' '.join(seq)]
       
   817 
       
   818 
       
   819 ####
       
   820 class ETag(object):
       
   821     def __init__(self, tag, weak=False):
       
   822         self.tag = str(tag)
       
   823         self.weak = weak
       
   824 
       
   825     def match(self, other, strongCompare):
       
   826         # Sec 13.3.
       
   827         # The strong comparison function: in order to be considered equal, both
       
   828         #   validators MUST be identical in every way, and both MUST NOT be weak.
       
   829         #
       
   830         # The weak comparison function: in order to be considered equal, both
       
   831         #   validators MUST be identical in every way, but either or both of
       
   832         #   them MAY be tagged as "weak" without affecting the result.
       
   833 
       
   834         if not isinstance(other, ETag) or other.tag != self.tag:
       
   835             return False
       
   836 
       
   837         if strongCompare and (other.weak or self.weak):
       
   838             return False
       
   839         return True
       
   840 
       
   841     def __eq__(self, other):
       
   842         return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak
       
   843 
       
   844     def __ne__(self, other):
       
   845         return not self.__eq__(other)
       
   846 
       
   847     def __repr__(self):
       
   848         return "Etag(%r, weak=%r)" % (self.tag, self.weak)
       
   849 
       
   850     def parse(tokens):
       
   851         tokens=tuple(tokens)
       
   852         if len(tokens) == 1 and not isinstance(tokens[0], Token):
       
   853             return ETag(tokens[0])
       
   854 
       
   855         if(len(tokens) == 3 and tokens[0] == "w"
       
   856            and tokens[1] == Token('/')):
       
   857             return ETag(tokens[2], weak=True)
       
   858 
       
   859         raise ValueError("Invalid ETag.")
       
   860 
       
   861     parse=staticmethod(parse)
       
   862 
       
   863     def generate(self):
       
   864         if self.weak:
       
   865             return 'W/'+quoteString(self.tag)
       
   866         else:
       
   867             return quoteString(self.tag)
       
   868 
       
   869 def parseStarOrETag(tokens):
       
   870     tokens=tuple(tokens)
       
   871     if tokens == ('*',):
       
   872         return '*'
       
   873     else:
       
   874         return ETag.parse(tokens)
       
   875 
       
   876 def generateStarOrETag(etag):
       
   877     if etag=='*':
       
   878         return etag
       
   879     else:
       
   880         return etag.generate()
       
   881 
       
   882 #### Cookies. Blech!
       
   883 class Cookie(object):
       
   884     # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', 'discard', 'secure', 'comment', 'commenturl', 'version']
       
   885 
       
   886     def __init__(self, name, value, path=None, domain=None, ports=None, expires=None, discard=False, secure=False, comment=None, commenturl=None, version=0):
       
   887         self.name=name
       
   888         self.value=value
       
   889         self.path=path
       
   890         self.domain=domain
       
   891         self.ports=ports
       
   892         self.expires=expires
       
   893         self.discard=discard
       
   894         self.secure=secure
       
   895         self.comment=comment
       
   896         self.commenturl=commenturl
       
   897         self.version=version
       
   898 
       
   899     def __repr__(self):
       
   900         s="Cookie(%r=%r" % (self.name, self.value)
       
   901         if self.path is not None: s+=", path=%r" % (self.path,)
       
   902         if self.domain is not None: s+=", domain=%r" % (self.domain,)
       
   903         if self.ports is not None: s+=", ports=%r" % (self.ports,)
       
   904         if self.expires is not None: s+=", expires=%r" % (self.expires,)
       
   905         if self.secure is not False: s+=", secure=%r" % (self.secure,)
       
   906         if self.comment is not None: s+=", comment=%r" % (self.comment,)
       
   907         if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,)
       
   908         if self.version != 0: s+=", version=%r" % (self.version,)
       
   909         s+=")"
       
   910         return s
       
   911 
       
   912     def __eq__(self, other):
       
   913         return (isinstance(other, Cookie) and
       
   914                 other.path == self.path and
       
   915                 other.domain == self.domain and
       
   916                 other.ports == self.ports and
       
   917                 other.expires == self.expires and
       
   918                 other.secure == self.secure and
       
   919                 other.comment == self.comment and
       
   920                 other.commenturl == self.commenturl and
       
   921                 other.version == self.version)
       
   922 
       
   923     def __ne__(self, other):
       
   924         return not self.__eq__(other)
       
   925 
       
   926 
       
   927 def parseCookie(headers):
       
   928     """Bleargh, the cookie spec sucks.
       
   929     This surely needs interoperability testing.
       
   930     There are two specs that are supported:
       
   931     Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html
       
   932     Version 1) http://www.faqs.org/rfcs/rfc2965.html
       
   933     """
       
   934 
       
   935     cookies = []
       
   936     # There can't really be multiple cookie headers according to RFC, because
       
   937     # if multiple headers are allowed, they must be joinable with ",".
       
   938     # Neither new RFC2965 cookies nor old netscape cookies are.
       
   939 
       
   940     header = ';'.join(headers)
       
   941     if header[0:8].lower() == "$version":
       
   942         # RFC2965 cookie
       
   943         h=tokenize([header], foldCase=False)
       
   944         r_cookies = split(h, Token(','))
       
   945         for r_cookie in r_cookies:
       
   946             last_cookie = None
       
   947             rr_cookies = split(r_cookie, Token(';'))
       
   948             for cookie in rr_cookies:
       
   949                 nameval = tuple(split(cookie, Token('=')))
       
   950                 if len(nameval) == 2:
       
   951                     (name,), (value,) = nameval
       
   952                 else:
       
   953                     (name,), = nameval
       
   954                     value = None
       
   955 
       
   956                 name=name.lower()
       
   957                 if name == '$version':
       
   958                     continue
       
   959                 if name[0] == '$':
       
   960                     if last_cookie is not None:
       
   961                         if name == '$path':
       
   962                             last_cookie.path=value
       
   963                         elif name == '$domain':
       
   964                             last_cookie.domain=value
       
   965                         elif name == '$port':
       
   966                             if value is None:
       
   967                                 last_cookie.ports = ()
       
   968                             else:
       
   969                                 last_cookie.ports=tuple([int(s) for s in value.split(',')])
       
   970                 else:
       
   971                     last_cookie = Cookie(name, value, version=1)
       
   972                     cookies.append(last_cookie)
       
   973     else:
       
   974         # Oldstyle cookies don't do quoted strings or anything sensible.
       
   975         # All characters are valid for names except ';' and '=', and all
       
   976         # characters are valid for values except ';'. Spaces are stripped,
       
   977         # however.
       
   978         r_cookies = header.split(';')
       
   979         for r_cookie in r_cookies:
       
   980             name,value = r_cookie.split('=', 1)
       
   981             name=name.strip(' \t')
       
   982             value=value.strip(' \t')
       
   983 
       
   984             cookies.append(Cookie(name, value))
       
   985 
       
   986     return cookies
       
   987 
       
   988 cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$"
       
   989 cookie_validname_re = re.compile(cookie_validname)
       
   990 cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$'
       
   991 cookie_validvalue_re = re.compile(cookie_validvalue)
       
   992 
       
   993 def generateCookie(cookies):
       
   994     # There's a fundamental problem with the two cookie specifications.
       
   995     # They both use the "Cookie" header, and the RFC Cookie header only allows
       
   996     # one version to be specified. Thus, when you have a collection of V0 and
       
   997     # V1 cookies, you have to either send them all as V0 or send them all as
       
   998     # V1.
       
   999 
       
  1000     # I choose to send them all as V1.
       
  1001 
       
  1002     # You might think converting a V0 cookie to a V1 cookie would be lossless,
       
  1003     # but you'd be wrong. If you do the conversion, and a V0 parser tries to
       
  1004     # read the cookie, it will see a modified form of the cookie, in cases
       
  1005     # where quotes must be added to conform to proper V1 syntax.
       
  1006     # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y")
       
  1007 
       
  1008     # However, that is what we will do, anyways. It has a high probability of
       
  1009     # breaking applications that only handle oldstyle cookies, where some other
       
  1010     # application set a newstyle cookie that is applicable over for site
       
  1011     # (or host), AND where the oldstyle cookie uses a value which is invalid
       
  1012     # syntax in a newstyle cookie.
       
  1013 
       
  1014     # Also, the cookie name *cannot* be quoted in V1, so some cookies just
       
  1015     # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These
       
  1016     # are just dicarded during conversion.
       
  1017 
       
  1018     # As this is an unsolvable problem, I will pretend I can just say
       
  1019     # OH WELL, don't do that, or else upgrade your old applications to have
       
  1020     # newstyle cookie parsers.
       
  1021 
       
  1022     # I will note offhandedly that there are *many* sites which send V0 cookies
       
  1023     # that are not valid V1 cookie syntax. About 20% for my cookies file.
       
  1024     # However, they do not generally mix them with V1 cookies, so this isn't
       
  1025     # an issue, at least right now. I have not tested to see how many of those
       
  1026     # webapps support RFC2965 V1 cookies. I suspect not many.
       
  1027 
       
  1028     max_version = max([cookie.version for cookie in cookies])
       
  1029 
       
  1030     if max_version == 0:
       
  1031         # no quoting or anything.
       
  1032         return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies])
       
  1033     else:
       
  1034         str_cookies = ['$Version="1"']
       
  1035         for cookie in cookies:
       
  1036             if cookie.version == 0:
       
  1037                 # Version 0 cookie: we make sure the name and value are valid
       
  1038                 # V1 syntax.
       
  1039 
       
  1040                 # If they are, we use them as is. This means in *most* cases,
       
  1041                 # the cookie will look literally the same on output as it did
       
  1042                 # on input.
       
  1043                 # If it isn't a valid name, ignore the cookie.
       
  1044                 # If it isn't a valid value, quote it and hope for the best on
       
  1045                 # the other side.
       
  1046 
       
  1047                 if cookie_validname_re.match(cookie.name) is None:
       
  1048                     continue
       
  1049 
       
  1050                 value=cookie.value
       
  1051                 if cookie_validvalue_re.match(cookie.value) is None:
       
  1052                     value = quoteString(value)
       
  1053 
       
  1054                 str_cookies.append("%s=%s" % (cookie.name, value))
       
  1055             else:
       
  1056                 # V1 cookie, nice and easy
       
  1057                 str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value)))
       
  1058 
       
  1059             if cookie.path:
       
  1060                 str_cookies.append("$Path=%s" % quoteString(cookie.path))
       
  1061             if cookie.domain:
       
  1062                 str_cookies.append("$Domain=%s" % quoteString(cookie.domain))
       
  1063             if cookie.ports is not None:
       
  1064                 if len(cookie.ports) == 0:
       
  1065                     str_cookies.append("$Port")
       
  1066                 else:
       
  1067                     str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports])))
       
  1068         return ';'.join(str_cookies)
       
  1069 
       
  1070 def parseSetCookie(headers):
       
  1071     setCookies = []
       
  1072     for header in headers:
       
  1073         try:
       
  1074             parts = header.split(';')
       
  1075             l = []
       
  1076 
       
  1077             for part in parts:
       
  1078                 namevalue = part.split('=',1)
       
  1079                 if len(namevalue) == 1:
       
  1080                     name=namevalue[0]
       
  1081                     value=None
       
  1082                 else:
       
  1083                     name,value=namevalue
       
  1084                     value=value.strip(' \t')
       
  1085 
       
  1086                 name=name.strip(' \t')
       
  1087 
       
  1088                 l.append((name, value))
       
  1089 
       
  1090             setCookies.append(makeCookieFromList(l, True))
       
  1091         except ValueError:
       
  1092             # If we can't parse one Set-Cookie, ignore it,
       
  1093             # but not the rest of Set-Cookies.
       
  1094             pass
       
  1095     return setCookies
       
  1096 
       
  1097 def parseSetCookie2(toks):
       
  1098     outCookies = []
       
  1099     for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))]
       
  1100                    for y in split(toks, Token(','))]:
       
  1101         try:
       
  1102             outCookies.append(makeCookieFromList(cookie, False))
       
  1103         except ValueError:
       
  1104             # Again, if we can't handle one cookie -- ignore it.
       
  1105             pass
       
  1106     return outCookies
       
  1107 
       
  1108 def makeCookieFromList(tup, netscapeFormat):
       
  1109     name, value = tup[0]
       
  1110     if name is None or value is None:
       
  1111         raise ValueError("Cookie has missing name or value")
       
  1112     if name.startswith("$"):
       
  1113         raise ValueError("Invalid cookie name: %r, starts with '$'." % name)
       
  1114     cookie = Cookie(name, value)
       
  1115     hadMaxAge = False
       
  1116 
       
  1117     for name,value in tup[1:]:
       
  1118         name = name.lower()
       
  1119 
       
  1120         if value is None:
       
  1121             if name in ("discard", "secure"):
       
  1122                 # Boolean attrs
       
  1123                 value = True
       
  1124             elif name != "port":
       
  1125                 # Can be either boolean or explicit
       
  1126                 continue
       
  1127 
       
  1128         if name in ("comment", "commenturl", "discard", "domain", "path", "secure"):
       
  1129             # simple cases
       
  1130             setattr(cookie, name, value)
       
  1131         elif name == "expires" and not hadMaxAge:
       
  1132             if netscapeFormat and value[0] == '"' and value[-1] == '"':
       
  1133                 value = value[1:-1]
       
  1134             cookie.expires = parseDateTime(value)
       
  1135         elif name == "max-age":
       
  1136             hadMaxAge = True
       
  1137             cookie.expires = int(value) + time.time()
       
  1138         elif name == "port":
       
  1139             if value is None:
       
  1140                 cookie.ports = ()
       
  1141             else:
       
  1142                 if netscapeFormat and value[0] == '"' and value[-1] == '"':
       
  1143                     value = value[1:-1]
       
  1144                 cookie.ports = tuple([int(s) for s in value.split(',')])
       
  1145         elif name == "version":
       
  1146             cookie.version = int(value)
       
  1147 
       
  1148     return cookie
       
  1149 
       
  1150 
       
  1151 def generateSetCookie(cookies):
       
  1152     setCookies = []
       
  1153     for cookie in cookies:
       
  1154         out = ["%s=%s" % (cookie.name, cookie.value)]
       
  1155         if cookie.expires:
       
  1156             out.append("expires=%s" % generateDateTime(cookie.expires))
       
  1157         if cookie.path:
       
  1158             out.append("path=%s" % cookie.path)
       
  1159         if cookie.domain:
       
  1160             out.append("domain=%s" % cookie.domain)
       
  1161         if cookie.secure:
       
  1162             out.append("secure")
       
  1163 
       
  1164         setCookies.append('; '.join(out))
       
  1165     return setCookies
       
  1166 
       
  1167 def generateSetCookie2(cookies):
       
  1168     setCookies = []
       
  1169     for cookie in cookies:
       
  1170         out = ["%s=%s" % (cookie.name, quoteString(cookie.value))]
       
  1171         if cookie.comment:
       
  1172             out.append("Comment=%s" % quoteString(cookie.comment))
       
  1173         if cookie.commenturl:
       
  1174             out.append("CommentURL=%s" % quoteString(cookie.commenturl))
       
  1175         if cookie.discard:
       
  1176             out.append("Discard")
       
  1177         if cookie.domain:
       
  1178             out.append("Domain=%s" % quoteString(cookie.domain))
       
  1179         if cookie.expires:
       
  1180             out.append("Max-Age=%s" % (cookie.expires - time.time()))
       
  1181         if cookie.path:
       
  1182             out.append("Path=%s" % quoteString(cookie.path))
       
  1183         if cookie.ports is not None:
       
  1184             if len(cookie.ports) == 0:
       
  1185                 out.append("Port")
       
  1186             else:
       
  1187                 out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports])))
       
  1188         if cookie.secure:
       
  1189             out.append("Secure")
       
  1190         out.append('Version="1"')
       
  1191         setCookies.append('; '.join(out))
       
  1192     return setCookies
       
  1193 
       
  1194 def parseDepth(depth):
       
  1195     if depth not in ("0", "1", "infinity"):
       
  1196         raise ValueError("Invalid depth header value: %s" % (depth,))
       
  1197     return depth
       
  1198 
       
  1199 def parseOverWrite(overwrite):
       
  1200     if overwrite == "F":
       
  1201         return False
       
  1202     elif overwrite == "T":
       
  1203         return True
       
  1204     raise ValueError("Invalid overwrite header value: %s" % (overwrite,))
       
  1205 
       
  1206 def generateOverWrite(overwrite):
       
  1207     if overwrite:
       
  1208         return "T"
       
  1209     else:
       
  1210         return "F"
       
  1211 
       
  1212 ##### Random stuff that looks useful.
       
  1213 # def sortMimeQuality(s):
       
  1214 #     def sorter(item1, item2):
       
  1215 #         if item1[0] == '*':
       
  1216 #             if item2[0] == '*':
       
  1217 #                 return 0
       
  1218 
       
  1219 
       
  1220 # def sortQuality(s):
       
  1221 #     def sorter(item1, item2):
       
  1222 #         if item1[1] < item2[1]:
       
  1223 #             return -1
       
  1224 #         if item1[1] < item2[1]:
       
  1225 #             return 1
       
  1226 #         if item1[0] == item2[0]:
       
  1227 #             return 0
       
  1228 
       
  1229 
       
  1230 # def getMimeQuality(mimeType, accepts):
       
  1231 #     type,args = parseArgs(mimeType)
       
  1232 #     type=type.split(Token('/'))
       
  1233 #     if len(type) != 2:
       
  1234 #         raise ValueError, "MIME Type "+s+" invalid."
       
  1235 
       
  1236 #     for accept in accepts:
       
  1237 #         accept,acceptQual=accept
       
  1238 #         acceptType=accept[0:1]
       
  1239 #         acceptArgs=accept[2]
       
  1240 
       
  1241 #         if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and
       
  1242 #             (args == acceptArgs or len(acceptArgs) == 0)):
       
  1243 #             return acceptQual
       
  1244 
       
  1245 # def getQuality(type, accepts):
       
  1246 #     qual = accepts.get(type)
       
  1247 #     if qual is not None:
       
  1248 #         return qual
       
  1249 
       
  1250 #     return accepts.get('*')
       
  1251 
       
  1252 # Headers object
       
  1253 class __RecalcNeeded(object):
       
  1254     def __repr__(self):
       
  1255         return "<RecalcNeeded>"
       
  1256 
       
  1257 _RecalcNeeded = __RecalcNeeded()
       
  1258 
       
  1259 class Headers(object):
       
  1260     """This class stores the HTTP headers as both a parsed representation and
       
  1261     the raw string representation. It converts between the two on demand."""
       
  1262 
       
  1263     def __init__(self, headers=None, rawHeaders=None, handler=DefaultHTTPHandler):
       
  1264         self._raw_headers = {}
       
  1265         self._headers = {}
       
  1266         self.handler = handler
       
  1267         if headers is not None:
       
  1268             for key, value in headers.iteritems():
       
  1269                 self.setHeader(key, value)
       
  1270         if rawHeaders is not None:
       
  1271             for key, value in rawHeaders.iteritems():
       
  1272                 self.setRawHeaders(key, value)
       
  1273 
       
  1274     def _setRawHeaders(self, headers):
       
  1275         self._raw_headers = headers
       
  1276         self._headers = {}
       
  1277 
       
  1278     def _toParsed(self, name):
       
  1279         r = self._raw_headers.get(name, None)
       
  1280         h = self.handler.parse(name, r)
       
  1281         if h is not None:
       
  1282             self._headers[name] = h
       
  1283         return h
       
  1284 
       
  1285     def _toRaw(self, name):
       
  1286         h = self._headers.get(name, None)
       
  1287         r = self.handler.generate(name, h)
       
  1288         if r is not None:
       
  1289             self._raw_headers[name] = r
       
  1290         return r
       
  1291 
       
  1292     def hasHeader(self, name):
       
  1293         """Does a header with the given name exist?"""
       
  1294         name=name.lower()
       
  1295         return self._raw_headers.has_key(name)
       
  1296 
       
  1297     def getRawHeaders(self, name, default=None):
       
  1298         """Returns a list of headers matching the given name as the raw string given."""
       
  1299 
       
  1300         name=name.lower()
       
  1301         raw_header = self._raw_headers.get(name, default)
       
  1302         if raw_header is not _RecalcNeeded:
       
  1303             return raw_header
       
  1304 
       
  1305         return self._toRaw(name)
       
  1306 
       
  1307     def getHeader(self, name, default=None):
       
  1308         """Ret9urns the parsed representation of the given header.
       
  1309         The exact form of the return value depends on the header in question.
       
  1310 
       
  1311         If no parser for the header exists, raise ValueError.
       
  1312 
       
  1313         If the header doesn't exist, return default (or None if not specified)
       
  1314         """
       
  1315         name=name.lower()
       
  1316         parsed = self._headers.get(name, default)
       
  1317         if parsed is not _RecalcNeeded:
       
  1318             return parsed
       
  1319         return self._toParsed(name)
       
  1320 
       
  1321     def setRawHeaders(self, name, value):
       
  1322         """Sets the raw representation of the given header.
       
  1323         Value should be a list of strings, each being one header of the
       
  1324         given name.
       
  1325         """
       
  1326         name=name.lower()
       
  1327         self._raw_headers[name] = value
       
  1328         self._headers[name] = _RecalcNeeded
       
  1329 
       
  1330     def setHeader(self, name, value):
       
  1331         """Sets the parsed representation of the given header.
       
  1332         Value should be a list of objects whose exact form depends
       
  1333         on the header in question.
       
  1334         """
       
  1335         name=name.lower()
       
  1336         self._raw_headers[name] = _RecalcNeeded
       
  1337         self._headers[name] = value
       
  1338 
       
  1339     def addRawHeader(self, name, value):
       
  1340         """
       
  1341         Add a raw value to a header that may or may not already exist.
       
  1342         If it exists, add it as a separate header to output; do not
       
  1343         replace anything.
       
  1344         """
       
  1345         name=name.lower()
       
  1346         raw_header = self._raw_headers.get(name)
       
  1347         if raw_header is None:
       
  1348             # No header yet
       
  1349             raw_header = []
       
  1350             self._raw_headers[name] = raw_header
       
  1351         elif raw_header is _RecalcNeeded:
       
  1352             raw_header = self._toRaw(name)
       
  1353 
       
  1354         raw_header.append(value)
       
  1355         self._headers[name] = _RecalcNeeded
       
  1356 
       
  1357     def removeHeader(self, name):
       
  1358         """Removes the header named."""
       
  1359 
       
  1360         name=name.lower()
       
  1361         if self._raw_headers.has_key(name):
       
  1362             del self._raw_headers[name]
       
  1363             del self._headers[name]
       
  1364 
       
  1365     def __repr__(self):
       
  1366         return '<Headers: Raw: %s Parsed: %s>'% (self._raw_headers, self._headers)
       
  1367 
       
  1368     def canonicalNameCaps(self, name):
       
  1369         """Return the name with the canonical capitalization, if known,
       
  1370         otherwise, Caps-After-Dashes"""
       
  1371         return header_case_mapping.get(name) or dashCapitalize(name)
       
  1372 
       
  1373     def getAllRawHeaders(self):
       
  1374         """Return an iterator of key,value pairs of all headers
       
  1375         contained in this object, as strings. The keys are capitalized
       
  1376         in canonical capitalization."""
       
  1377         for k,v in self._raw_headers.iteritems():
       
  1378             if v is _RecalcNeeded:
       
  1379                 v = self._toRaw(k)
       
  1380             yield self.canonicalNameCaps(k), v
       
  1381 
       
  1382     def makeImmutable(self):
       
  1383         """Make this header set immutable. All mutating operations will
       
  1384         raise an exception."""
       
  1385         self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise
       
  1386 
       
  1387     def _mutateRaise(self, *args):
       
  1388         raise AttributeError("This header object is immutable as the headers have already been sent.")
       
  1389 
       
  1390 
       
  1391 """The following dicts are all mappings of header to list of operations
       
  1392    to perform. The first operation should generally be 'tokenize' if the
       
  1393    header can be parsed according to the normal tokenization rules. If
       
  1394    it cannot, generally the first thing you want to do is take only the
       
  1395    last instance of the header (in case it was sent multiple times, which
       
  1396    is strictly an error, but we're nice.).
       
  1397    """
       
  1398 
       
  1399 iteritems = lambda x: x.iteritems()
       
  1400 
       
  1401 
       
  1402 parser_general_headers = {
       
  1403     'Cache-Control':(tokenize, listParser(parseCacheControl), dict),
       
  1404     'Connection':(tokenize,filterTokens),
       
  1405     'Date':(last,parseDateTime),
       
  1406 #    'Pragma':tokenize
       
  1407 #    'Trailer':tokenize
       
  1408     'Transfer-Encoding':(tokenize,filterTokens),
       
  1409 #    'Upgrade':tokenize
       
  1410 #    'Via':tokenize,stripComment
       
  1411 #    'Warning':tokenize
       
  1412 }
       
  1413 
       
  1414 generator_general_headers = {
       
  1415     'Cache-Control':(iteritems, listGenerator(generateCacheControl), singleHeader),
       
  1416     'Connection':(generateList,singleHeader),
       
  1417     'Date':(generateDateTime,singleHeader),
       
  1418 #    'Pragma':
       
  1419 #    'Trailer':
       
  1420     'Transfer-Encoding':(generateList,singleHeader),
       
  1421 #    'Upgrade':
       
  1422 #    'Via':
       
  1423 #    'Warning':
       
  1424 }
       
  1425 
       
  1426 parser_request_headers = {
       
  1427     'Accept': (tokenize, listParser(parseAccept), dict),
       
  1428     'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset),
       
  1429     'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding),
       
  1430     'Accept-Language':(tokenize, listParser(parseAcceptQvalue), dict),
       
  1431     'Authorization': (last, parseAuthorization),
       
  1432     'Cookie':(parseCookie,),
       
  1433     'Expect':(tokenize, listParser(parseExpect), dict),
       
  1434     'From':(last,),
       
  1435     'Host':(last,),
       
  1436     'If-Match':(tokenize, listParser(parseStarOrETag), list),
       
  1437     'If-Modified-Since':(last, parseIfModifiedSince),
       
  1438     'If-None-Match':(tokenize, listParser(parseStarOrETag), list),
       
  1439     'If-Range':(parseIfRange,),
       
  1440     'If-Unmodified-Since':(last,parseDateTime),
       
  1441     'Max-Forwards':(last,int),
       
  1442 #    'Proxy-Authorization':str, # what is "credentials"
       
  1443     'Range':(tokenize, parseRange),
       
  1444     'Referer':(last,str), # TODO: URI object?
       
  1445     'TE':(tokenize, listParser(parseAcceptQvalue), dict),
       
  1446     'User-Agent':(last,str),
       
  1447 }
       
  1448 
       
  1449 generator_request_headers = {
       
  1450     'Accept': (iteritems,listGenerator(generateAccept),singleHeader),
       
  1451     'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
       
  1452     'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader),
       
  1453     'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
       
  1454     'Authorization': (generateAuthorization,), # what is "credentials"
       
  1455     'Cookie':(generateCookie,singleHeader),
       
  1456     'Expect':(iteritems, listGenerator(generateExpect), singleHeader),
       
  1457     'From':(str,singleHeader),
       
  1458     'Host':(str,singleHeader),
       
  1459     'If-Match':(listGenerator(generateStarOrETag), singleHeader),
       
  1460     'If-Modified-Since':(generateDateTime,singleHeader),
       
  1461     'If-None-Match':(listGenerator(generateStarOrETag), singleHeader),
       
  1462     'If-Range':(generateIfRange, singleHeader),
       
  1463     'If-Unmodified-Since':(generateDateTime,singleHeader),
       
  1464     'Max-Forwards':(str, singleHeader),
       
  1465 #    'Proxy-Authorization':str, # what is "credentials"
       
  1466     'Range':(generateRange,singleHeader),
       
  1467     'Referer':(str,singleHeader),
       
  1468     'TE': (iteritems, listGenerator(generateAcceptQvalue),singleHeader),
       
  1469     'User-Agent':(str,singleHeader),
       
  1470 }
       
  1471 
       
  1472 parser_response_headers = {
       
  1473     'Accept-Ranges':(tokenize, filterTokens),
       
  1474     'Age':(last,int),
       
  1475     'ETag':(tokenize, ETag.parse),
       
  1476     'Location':(last,), # TODO: URI object?
       
  1477 #    'Proxy-Authenticate'
       
  1478     'Retry-After':(last, parseRetryAfter),
       
  1479     'Server':(last,),
       
  1480     'Set-Cookie':(parseSetCookie,),
       
  1481     'Set-Cookie2':(tokenize, parseSetCookie2),
       
  1482     'Vary':(tokenize, filterTokens),
       
  1483     'WWW-Authenticate': (lambda h: tokenize(h, foldCase=False),
       
  1484                          parseWWWAuthenticate,)
       
  1485 }
       
  1486 
       
  1487 generator_response_headers = {
       
  1488     'Accept-Ranges':(generateList, singleHeader),
       
  1489     'Age':(str, singleHeader),
       
  1490     'ETag':(ETag.generate, singleHeader),
       
  1491     'Location':(str, singleHeader),
       
  1492 #    'Proxy-Authenticate'
       
  1493     'Retry-After':(generateRetryAfter, singleHeader),
       
  1494     'Server':(str, singleHeader),
       
  1495     'Set-Cookie':(generateSetCookie,),
       
  1496     'Set-Cookie2':(generateSetCookie2,),
       
  1497     'Vary':(generateList, singleHeader),
       
  1498     'WWW-Authenticate':(generateWWWAuthenticate,)
       
  1499 }
       
  1500 
       
  1501 parser_entity_headers = {
       
  1502     'Allow':(lambda str:tokenize(str, foldCase=False), filterTokens),
       
  1503     'Content-Encoding':(tokenize, filterTokens),
       
  1504     'Content-Language':(tokenize, filterTokens),
       
  1505     'Content-Length':(last, int),
       
  1506     'Content-Location':(last,), # TODO: URI object?
       
  1507     'Content-MD5':(last, parseContentMD5),
       
  1508     'Content-Range':(last, parseContentRange),
       
  1509     'Content-Type':(lambda str:tokenize(str, foldCase=False), parseContentType),
       
  1510     'Expires':(last, parseExpires),
       
  1511     'Last-Modified':(last, parseDateTime),
       
  1512     }
       
  1513 
       
  1514 generator_entity_headers = {
       
  1515     'Allow':(generateList, singleHeader),
       
  1516     'Content-Encoding':(generateList, singleHeader),
       
  1517     'Content-Language':(generateList, singleHeader),
       
  1518     'Content-Length':(str, singleHeader),
       
  1519     'Content-Location':(str, singleHeader),
       
  1520     'Content-MD5':(base64.encodestring, lambda x: x.strip("\n"), singleHeader),
       
  1521     'Content-Range':(generateContentRange, singleHeader),
       
  1522     'Content-Type':(generateContentType, singleHeader),
       
  1523     'Expires':(generateDateTime, singleHeader),
       
  1524     'Last-Modified':(generateDateTime, singleHeader),
       
  1525     }
       
  1526 
       
  1527 DefaultHTTPHandler.updateParsers(parser_general_headers)
       
  1528 DefaultHTTPHandler.updateParsers(parser_request_headers)
       
  1529 DefaultHTTPHandler.updateParsers(parser_response_headers)
       
  1530 DefaultHTTPHandler.updateParsers(parser_entity_headers)
       
  1531 
       
  1532 DefaultHTTPHandler.updateGenerators(generator_general_headers)
       
  1533 DefaultHTTPHandler.updateGenerators(generator_request_headers)
       
  1534 DefaultHTTPHandler.updateGenerators(generator_response_headers)
       
  1535 DefaultHTTPHandler.updateGenerators(generator_entity_headers)
       
  1536 
       
  1537 
       
  1538 # casemappingify(DefaultHTTPParsers)
       
  1539 # casemappingify(DefaultHTTPGenerators)
       
  1540 
       
  1541 # lowerify(DefaultHTTPParsers)
       
  1542 # lowerify(DefaultHTTPGenerators)