|
1 # This file has been extracted from the abandoned TwistedWeb2 project |
|
2 # http://twistedmatrix.com/trac/wiki/TwistedWeb2 |
|
3 |
|
4 |
|
5 from __future__ import generators |
|
6 |
|
7 import types, time |
|
8 from calendar import timegm |
|
9 import base64 |
|
10 import re |
|
11 |
|
12 def dashCapitalize(s): |
|
13 ''' Capitalize a string, making sure to treat - as a word seperator ''' |
|
14 return '-'.join([ x.capitalize() for x in s.split('-')]) |
|
15 |
|
16 # datetime parsing and formatting |
|
17 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
|
18 weekdayname_lower = [name.lower() for name in weekdayname] |
|
19 monthname = [None, |
|
20 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
|
21 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
|
22 monthname_lower = [name and name.lower() for name in monthname] |
|
23 |
|
24 # HTTP Header parsing API |
|
25 |
|
26 header_case_mapping = {} |
|
27 |
|
28 def casemappingify(d): |
|
29 global header_case_mapping |
|
30 newd = dict([(key.lower(),key) for key in d.keys()]) |
|
31 header_case_mapping.update(newd) |
|
32 |
|
33 def lowerify(d): |
|
34 return dict([(key.lower(),value) for key,value in d.items()]) |
|
35 |
|
36 |
|
37 class HeaderHandler(object): |
|
38 """HeaderHandler manages header generating and parsing functions. |
|
39 """ |
|
40 HTTPParsers = {} |
|
41 HTTPGenerators = {} |
|
42 |
|
43 def __init__(self, parsers=None, generators=None): |
|
44 """ |
|
45 @param parsers: A map of header names to parsing functions. |
|
46 @type parsers: L{dict} |
|
47 |
|
48 @param generators: A map of header names to generating functions. |
|
49 @type generators: L{dict} |
|
50 """ |
|
51 |
|
52 if parsers: |
|
53 self.HTTPParsers.update(parsers) |
|
54 if generators: |
|
55 self.HTTPGenerators.update(generators) |
|
56 |
|
57 def parse(self, name, header): |
|
58 """ |
|
59 Parse the given header based on its given name. |
|
60 |
|
61 @param name: The header name to parse. |
|
62 @type name: C{str} |
|
63 |
|
64 @param header: A list of unparsed headers. |
|
65 @type header: C{list} of C{str} |
|
66 |
|
67 @return: The return value is the parsed header representation, |
|
68 it is dependent on the header. See the HTTP Headers document. |
|
69 """ |
|
70 parser = self.HTTPParsers.get(name, None) |
|
71 if parser is None: |
|
72 raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,)) |
|
73 |
|
74 try: |
|
75 for p in parser: |
|
76 # print "Parsing %s: %s(%s)" % (name, repr(p), repr(h)) |
|
77 header = p(header) |
|
78 # if isinstance(h, types.GeneratorType): |
|
79 # h=list(h) |
|
80 except ValueError,v: |
|
81 # print v |
|
82 header=None |
|
83 |
|
84 return header |
|
85 |
|
86 def generate(self, name, header): |
|
87 """ |
|
88 Generate the given header based on its given name. |
|
89 |
|
90 @param name: The header name to generate. |
|
91 @type name: C{str} |
|
92 |
|
93 @param header: A parsed header, such as the output of |
|
94 L{HeaderHandler}.parse. |
|
95 |
|
96 @return: C{list} of C{str} each representing a generated HTTP header. |
|
97 """ |
|
98 generator = self.HTTPGenerators.get(name, None) |
|
99 |
|
100 if generator is None: |
|
101 # print self.generators |
|
102 raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,)) |
|
103 |
|
104 for g in generator: |
|
105 header = g(header) |
|
106 |
|
107 #self._raw_headers[name] = h |
|
108 return header |
|
109 |
|
110 def updateParsers(self, parsers): |
|
111 """Update en masse the parser maps. |
|
112 |
|
113 @param parsers: Map of header names to parser chains. |
|
114 @type parsers: C{dict} |
|
115 """ |
|
116 casemappingify(parsers) |
|
117 self.HTTPParsers.update(lowerify(parsers)) |
|
118 |
|
119 def addParser(self, name, value): |
|
120 """Add an individual parser chain for the given header. |
|
121 |
|
122 @param name: Name of the header to add |
|
123 @type name: C{str} |
|
124 |
|
125 @param value: The parser chain |
|
126 @type value: C{str} |
|
127 """ |
|
128 self.updateParsers({name: value}) |
|
129 |
|
130 def updateGenerators(self, generators): |
|
131 """Update en masse the generator maps. |
|
132 |
|
133 @param parsers: Map of header names to generator chains. |
|
134 @type parsers: C{dict} |
|
135 """ |
|
136 casemappingify(generators) |
|
137 self.HTTPGenerators.update(lowerify(generators)) |
|
138 |
|
139 def addGenerators(self, name, value): |
|
140 """Add an individual generator chain for the given header. |
|
141 |
|
142 @param name: Name of the header to add |
|
143 @type name: C{str} |
|
144 |
|
145 @param value: The generator chain |
|
146 @type value: C{str} |
|
147 """ |
|
148 self.updateGenerators({name: value}) |
|
149 |
|
150 def update(self, parsers, generators): |
|
151 """Conveniently update parsers and generators all at once. |
|
152 """ |
|
153 self.updateParsers(parsers) |
|
154 self.updateGenerators(generators) |
|
155 |
|
156 |
|
157 DefaultHTTPHandler = HeaderHandler() |
|
158 |
|
159 |
|
160 ## HTTP DateTime parser |
|
161 def parseDateTime(dateString): |
|
162 """Convert an HTTP date string (one of three formats) to seconds since epoch.""" |
|
163 parts = dateString.split() |
|
164 |
|
165 if not parts[0][0:3].lower() in weekdayname_lower: |
|
166 # Weekday is stupid. Might have been omitted. |
|
167 try: |
|
168 return parseDateTime("Sun, "+dateString) |
|
169 except ValueError: |
|
170 # Guess not. |
|
171 pass |
|
172 |
|
173 partlen = len(parts) |
|
174 if (partlen == 5 or partlen == 6) and parts[1].isdigit(): |
|
175 # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT |
|
176 # (Note: "GMT" is literal, not a variable timezone) |
|
177 # (also handles without "GMT") |
|
178 # This is the normal format |
|
179 day = parts[1] |
|
180 month = parts[2] |
|
181 year = parts[3] |
|
182 time = parts[4] |
|
183 elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: |
|
184 # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT |
|
185 # (Note: "GMT" is literal, not a variable timezone) |
|
186 # (also handles without without "GMT") |
|
187 # Two digit year, yucko. |
|
188 day, month, year = parts[1].split('-') |
|
189 time = parts[2] |
|
190 year=int(year) |
|
191 if year < 69: |
|
192 year = year + 2000 |
|
193 elif year < 100: |
|
194 year = year + 1900 |
|
195 elif len(parts) == 5: |
|
196 # 3rd date format: Sun Nov 6 08:49:37 1994 |
|
197 # ANSI C asctime() format. |
|
198 day = parts[2] |
|
199 month = parts[1] |
|
200 year = parts[4] |
|
201 time = parts[3] |
|
202 else: |
|
203 raise ValueError("Unknown datetime format %r" % dateString) |
|
204 |
|
205 day = int(day) |
|
206 month = int(monthname_lower.index(month.lower())) |
|
207 year = int(year) |
|
208 hour, min, sec = map(int, time.split(':')) |
|
209 return int(timegm((year, month, day, hour, min, sec))) |
|
210 |
|
211 |
|
212 ##### HTTP tokenizer |
|
213 class Token(str): |
|
214 __slots__=[] |
|
215 tokens = {} |
|
216 def __new__(self, char): |
|
217 token = Token.tokens.get(char) |
|
218 if token is None: |
|
219 Token.tokens[char] = token = str.__new__(self, char) |
|
220 return token |
|
221 |
|
222 def __repr__(self): |
|
223 return "Token(%s)" % str.__repr__(self) |
|
224 |
|
225 |
|
226 http_tokens = " \t\"()<>@,;:\\/[]?={}" |
|
227 http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" |
|
228 |
|
229 def tokenize(header, foldCase=True): |
|
230 """Tokenize a string according to normal HTTP header parsing rules. |
|
231 |
|
232 In particular: |
|
233 - Whitespace is irrelevant and eaten next to special separator tokens. |
|
234 Its existance (but not amount) is important between character strings. |
|
235 - Quoted string support including embedded backslashes. |
|
236 - Case is insignificant (and thus lowercased), except in quoted strings. |
|
237 (unless foldCase=False) |
|
238 - Multiple headers are concatenated with ',' |
|
239 |
|
240 NOTE: not all headers can be parsed with this function. |
|
241 |
|
242 Takes a raw header value (list of strings), and |
|
243 Returns a generator of strings and Token class instances. |
|
244 """ |
|
245 tokens=http_tokens |
|
246 ctls=http_ctls |
|
247 |
|
248 string = ",".join(header) |
|
249 list = [] |
|
250 start = 0 |
|
251 cur = 0 |
|
252 quoted = False |
|
253 qpair = False |
|
254 inSpaces = -1 |
|
255 qstring = None |
|
256 |
|
257 for x in string: |
|
258 if quoted: |
|
259 if qpair: |
|
260 qpair = False |
|
261 qstring = qstring+string[start:cur-1]+x |
|
262 start = cur+1 |
|
263 elif x == '\\': |
|
264 qpair = True |
|
265 elif x == '"': |
|
266 quoted = False |
|
267 yield qstring+string[start:cur] |
|
268 qstring=None |
|
269 start = cur+1 |
|
270 elif x in tokens: |
|
271 if start != cur: |
|
272 if foldCase: |
|
273 yield string[start:cur].lower() |
|
274 else: |
|
275 yield string[start:cur] |
|
276 |
|
277 start = cur+1 |
|
278 if x == '"': |
|
279 quoted = True |
|
280 qstring = "" |
|
281 inSpaces = False |
|
282 elif x in " \t": |
|
283 if inSpaces is False: |
|
284 inSpaces = True |
|
285 else: |
|
286 inSpaces = -1 |
|
287 yield Token(x) |
|
288 elif x in ctls: |
|
289 raise ValueError("Invalid control character: %d in header" % ord(x)) |
|
290 else: |
|
291 if inSpaces is True: |
|
292 yield Token(' ') |
|
293 inSpaces = False |
|
294 |
|
295 inSpaces = False |
|
296 cur = cur+1 |
|
297 |
|
298 if qpair: |
|
299 raise ValueError, "Missing character after '\\'" |
|
300 if quoted: |
|
301 raise ValueError, "Missing end quote" |
|
302 |
|
303 if start != cur: |
|
304 if foldCase: |
|
305 yield string[start:cur].lower() |
|
306 else: |
|
307 yield string[start:cur] |
|
308 |
|
309 def split(seq, delim): |
|
310 """The same as str.split but works on arbitrary sequences. |
|
311 Too bad it's not builtin to python!""" |
|
312 |
|
313 cur = [] |
|
314 for item in seq: |
|
315 if item == delim: |
|
316 yield cur |
|
317 cur = [] |
|
318 else: |
|
319 cur.append(item) |
|
320 yield cur |
|
321 |
|
322 # def find(seq, *args): |
|
323 # """The same as seq.index but returns -1 if not found, instead |
|
324 # Too bad it's not builtin to python!""" |
|
325 # try: |
|
326 # return seq.index(value, *args) |
|
327 # except ValueError: |
|
328 # return -1 |
|
329 |
|
330 |
|
331 def filterTokens(seq): |
|
332 """Filter out instances of Token, leaving only a list of strings. |
|
333 |
|
334 Used instead of a more specific parsing method (e.g. splitting on commas) |
|
335 when only strings are expected, so as to be a little lenient. |
|
336 |
|
337 Apache does it this way and has some comments about broken clients which |
|
338 forget commas (?), so I'm doing it the same way. It shouldn't |
|
339 hurt anything, in any case. |
|
340 """ |
|
341 |
|
342 l=[] |
|
343 for x in seq: |
|
344 if not isinstance(x, Token): |
|
345 l.append(x) |
|
346 return l |
|
347 |
|
348 ##### parser utilities: |
|
349 def checkSingleToken(tokens): |
|
350 if len(tokens) != 1: |
|
351 raise ValueError, "Expected single token, not %s." % (tokens,) |
|
352 return tokens[0] |
|
353 |
|
354 def parseKeyValue(val): |
|
355 if len(val) == 1: |
|
356 return val[0],None |
|
357 elif len(val) == 3 and val[1] == Token('='): |
|
358 return val[0],val[2] |
|
359 raise ValueError, "Expected key or key=value, but got %s." % (val,) |
|
360 |
|
361 def parseArgs(field): |
|
362 args=split(field, Token(';')) |
|
363 val = args.next() |
|
364 args = [parseKeyValue(arg) for arg in args] |
|
365 return val,args |
|
366 |
|
367 def listParser(fun): |
|
368 """Return a function which applies 'fun' to every element in the |
|
369 comma-separated list""" |
|
370 def listParserHelper(tokens): |
|
371 fields = split(tokens, Token(',')) |
|
372 for field in fields: |
|
373 if len(field) != 0: |
|
374 yield fun(field) |
|
375 |
|
376 return listParserHelper |
|
377 |
|
378 def last(seq): |
|
379 """Return seq[-1]""" |
|
380 |
|
381 return seq[-1] |
|
382 |
|
383 ##### Generation utilities |
|
384 def quoteString(s): |
|
385 return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"') |
|
386 |
|
387 def listGenerator(fun): |
|
388 """Return a function which applies 'fun' to every element in |
|
389 the given list, then joins the result with generateList""" |
|
390 def listGeneratorHelper(l): |
|
391 return generateList([fun(e) for e in l]) |
|
392 |
|
393 return listGeneratorHelper |
|
394 |
|
395 def generateList(seq): |
|
396 return ", ".join(seq) |
|
397 |
|
398 def singleHeader(item): |
|
399 return [item] |
|
400 |
|
401 def generateKeyValues(kvs): |
|
402 l = [] |
|
403 # print kvs |
|
404 for k,v in kvs: |
|
405 if v is None: |
|
406 l.append('%s' % k) |
|
407 else: |
|
408 l.append('%s=%s' % (k,v)) |
|
409 return ";".join(l) |
|
410 |
|
411 |
|
412 class MimeType(object): |
|
413 def fromString(klass, mimeTypeString): |
|
414 """Generate a MimeType object from the given string. |
|
415 |
|
416 @param mimeTypeString: The mimetype to parse |
|
417 |
|
418 @return: L{MimeType} |
|
419 """ |
|
420 return DefaultHTTPHandler.parse('content-type', [mimeTypeString]) |
|
421 |
|
422 fromString = classmethod(fromString) |
|
423 |
|
424 def __init__(self, mediaType, mediaSubtype, params={}, **kwargs): |
|
425 """ |
|
426 @type mediaType: C{str} |
|
427 |
|
428 @type mediaSubtype: C{str} |
|
429 |
|
430 @type params: C{dict} |
|
431 """ |
|
432 self.mediaType = mediaType |
|
433 self.mediaSubtype = mediaSubtype |
|
434 self.params = dict(params) |
|
435 |
|
436 if kwargs: |
|
437 self.params.update(kwargs) |
|
438 |
|
439 def __eq__(self, other): |
|
440 if not isinstance(other, MimeType): return NotImplemented |
|
441 return (self.mediaType == other.mediaType and |
|
442 self.mediaSubtype == other.mediaSubtype and |
|
443 self.params == other.params) |
|
444 |
|
445 def __ne__(self, other): |
|
446 return not self.__eq__(other) |
|
447 |
|
448 def __repr__(self): |
|
449 return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params) |
|
450 |
|
451 def __hash__(self): |
|
452 return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.iteritems())) |
|
453 |
|
454 ##### Specific header parsers. |
|
455 def parseAccept(field): |
|
456 type,args = parseArgs(field) |
|
457 |
|
458 if len(type) != 3 or type[1] != Token('/'): |
|
459 raise ValueError, "MIME Type "+str(type)+" invalid." |
|
460 |
|
461 # okay, this spec is screwy. A 'q' parameter is used as the separator |
|
462 # between MIME parameters and (as yet undefined) additional HTTP |
|
463 # parameters. |
|
464 |
|
465 num = 0 |
|
466 for arg in args: |
|
467 if arg[0] == 'q': |
|
468 mimeparams=tuple(args[0:num]) |
|
469 params=args[num:] |
|
470 break |
|
471 num = num + 1 |
|
472 else: |
|
473 mimeparams=tuple(args) |
|
474 params=[] |
|
475 |
|
476 # Default values for parameters: |
|
477 qval = 1.0 |
|
478 |
|
479 # Parse accept parameters: |
|
480 for param in params: |
|
481 if param[0] =='q': |
|
482 qval = float(param[1]) |
|
483 else: |
|
484 # Warn? ignored parameter. |
|
485 pass |
|
486 |
|
487 ret = MimeType(type[0],type[2],mimeparams),qval |
|
488 return ret |
|
489 |
|
490 def parseAcceptQvalue(field): |
|
491 type,args=parseArgs(field) |
|
492 |
|
493 type = checkSingleToken(type) |
|
494 |
|
495 qvalue = 1.0 # Default qvalue is 1 |
|
496 for arg in args: |
|
497 if arg[0] == 'q': |
|
498 qvalue = float(arg[1]) |
|
499 return type,qvalue |
|
500 |
|
501 def addDefaultCharset(charsets): |
|
502 if charsets.get('*') is None and charsets.get('iso-8859-1') is None: |
|
503 charsets['iso-8859-1'] = 1.0 |
|
504 return charsets |
|
505 |
|
506 def addDefaultEncoding(encodings): |
|
507 if encodings.get('*') is None and encodings.get('identity') is None: |
|
508 # RFC doesn't specify a default value for identity, only that it |
|
509 # "is acceptable" if not mentioned. Thus, give it a very low qvalue. |
|
510 encodings['identity'] = .0001 |
|
511 return encodings |
|
512 |
|
513 |
|
514 def parseContentType(header): |
|
515 # Case folding is disabled for this header, because of use of |
|
516 # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf |
|
517 # So, we need to explicitly .lower() the type/subtype and arg keys. |
|
518 |
|
519 type,args = parseArgs(header) |
|
520 |
|
521 if len(type) != 3 or type[1] != Token('/'): |
|
522 raise ValueError, "MIME Type "+str(type)+" invalid." |
|
523 |
|
524 args = [(kv[0].lower(), kv[1]) for kv in args] |
|
525 |
|
526 return MimeType(type[0].lower(), type[2].lower(), tuple(args)) |
|
527 |
|
528 def parseContentMD5(header): |
|
529 try: |
|
530 return base64.decodestring(header) |
|
531 except Exception,e: |
|
532 raise ValueError(e) |
|
533 |
|
534 def parseContentRange(header): |
|
535 """Parse a content-range header into (kind, start, end, realLength). |
|
536 |
|
537 realLength might be None if real length is not known ('*'). |
|
538 start and end might be None if start,end unspecified (for response code 416) |
|
539 """ |
|
540 kind, other = header.strip().split() |
|
541 if kind.lower() != "bytes": |
|
542 raise ValueError("a range of type %r is not supported") |
|
543 startend, realLength = other.split("/") |
|
544 if startend.strip() == '*': |
|
545 start,end=None,None |
|
546 else: |
|
547 start, end = map(int, startend.split("-")) |
|
548 if realLength == "*": |
|
549 realLength = None |
|
550 else: |
|
551 realLength = int(realLength) |
|
552 return (kind, start, end, realLength) |
|
553 |
|
554 def parseExpect(field): |
|
555 type,args=parseArgs(field) |
|
556 |
|
557 type=parseKeyValue(type) |
|
558 return (type[0], (lambda *args:args)(type[1], *args)) |
|
559 |
|
560 def parseExpires(header): |
|
561 # """HTTP/1.1 clients and caches MUST treat other invalid date formats, |
|
562 # especially including the value 0, as in the past (i.e., "already expired").""" |
|
563 |
|
564 try: |
|
565 return parseDateTime(header) |
|
566 except ValueError: |
|
567 return 0 |
|
568 |
|
569 def parseIfModifiedSince(header): |
|
570 # Ancient versions of netscape and *current* versions of MSIE send |
|
571 # If-Modified-Since: Thu, 05 Aug 2004 12:57:27 GMT; length=123 |
|
572 # which is blantantly RFC-violating and not documented anywhere |
|
573 # except bug-trackers for web frameworks. |
|
574 |
|
575 # So, we'll just strip off everything after a ';'. |
|
576 return parseDateTime(header.split(';', 1)[0]) |
|
577 |
|
578 def parseIfRange(headers): |
|
579 try: |
|
580 return ETag.parse(tokenize(headers)) |
|
581 except ValueError: |
|
582 return parseDateTime(last(headers)) |
|
583 |
|
584 def parseRange(range): |
|
585 range = list(range) |
|
586 if len(range) < 3 or range[1] != Token('='): |
|
587 raise ValueError("Invalid range header format: %s" %(range,)) |
|
588 |
|
589 type=range[0] |
|
590 if type != 'bytes': |
|
591 raise ValueError("Unknown range unit: %s." % (type,)) |
|
592 rangeset=split(range[2:], Token(',')) |
|
593 ranges = [] |
|
594 |
|
595 for byterangespec in rangeset: |
|
596 if len(byterangespec) != 1: |
|
597 raise ValueError("Invalid range header format: %s" % (range,)) |
|
598 start,end=byterangespec[0].split('-') |
|
599 |
|
600 if not start and not end: |
|
601 raise ValueError("Invalid range header format: %s" % (range,)) |
|
602 |
|
603 if start: |
|
604 start = int(start) |
|
605 else: |
|
606 start = None |
|
607 |
|
608 if end: |
|
609 end = int(end) |
|
610 else: |
|
611 end = None |
|
612 |
|
613 if start and end and start > end: |
|
614 raise ValueError("Invalid range header, start > end: %s" % (range,)) |
|
615 ranges.append((start,end)) |
|
616 return type,ranges |
|
617 |
|
618 def parseRetryAfter(header): |
|
619 try: |
|
620 # delta seconds |
|
621 return time.time() + int(header) |
|
622 except ValueError: |
|
623 # or datetime |
|
624 return parseDateTime(header) |
|
625 |
|
626 # WWW-Authenticate and Authorization |
|
627 |
|
628 def parseWWWAuthenticate(tokenized): |
|
629 headers = [] |
|
630 |
|
631 tokenList = list(tokenized) |
|
632 |
|
633 while tokenList: |
|
634 scheme = tokenList.pop(0) |
|
635 challenge = {} |
|
636 last = None |
|
637 kvChallenge = False |
|
638 |
|
639 while tokenList: |
|
640 token = tokenList.pop(0) |
|
641 if token == Token('='): |
|
642 kvChallenge = True |
|
643 challenge[last] = tokenList.pop(0) |
|
644 last = None |
|
645 |
|
646 elif token == Token(','): |
|
647 if kvChallenge: |
|
648 if len(tokenList) > 1 and tokenList[1] != Token('='): |
|
649 break |
|
650 |
|
651 else: |
|
652 break |
|
653 |
|
654 else: |
|
655 last = token |
|
656 |
|
657 if last and scheme and not challenge and not kvChallenge: |
|
658 challenge = last |
|
659 last = None |
|
660 |
|
661 headers.append((scheme, challenge)) |
|
662 |
|
663 if last and last not in (Token('='), Token(',')): |
|
664 if headers[-1] == (scheme, challenge): |
|
665 scheme = last |
|
666 challenge = {} |
|
667 headers.append((scheme, challenge)) |
|
668 |
|
669 return headers |
|
670 |
|
671 def parseAuthorization(header): |
|
672 scheme, rest = header.split(' ', 1) |
|
673 # this header isn't tokenized because it may eat characters |
|
674 # in the unquoted base64 encoded credentials |
|
675 return scheme.lower(), rest |
|
676 |
|
677 #### Header generators |
|
678 def generateAccept(accept): |
|
679 mimeType,q = accept |
|
680 |
|
681 out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) |
|
682 if mimeType.params: |
|
683 out+=';'+generateKeyValues(mimeType.params.iteritems()) |
|
684 |
|
685 if q != 1.0: |
|
686 out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.') |
|
687 |
|
688 return out |
|
689 |
|
690 def removeDefaultEncoding(seq): |
|
691 for item in seq: |
|
692 if item[0] != 'identity' or item[1] != .0001: |
|
693 yield item |
|
694 |
|
695 def generateAcceptQvalue(keyvalue): |
|
696 if keyvalue[1] == 1.0: |
|
697 return "%s" % keyvalue[0:1] |
|
698 else: |
|
699 return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.') |
|
700 |
|
701 def parseCacheControl(kv): |
|
702 k, v = parseKeyValue(kv) |
|
703 if k == 'max-age' or k == 'min-fresh' or k == 's-maxage': |
|
704 # Required integer argument |
|
705 if v is None: |
|
706 v = 0 |
|
707 else: |
|
708 v = int(v) |
|
709 elif k == 'max-stale': |
|
710 # Optional integer argument |
|
711 if v is not None: |
|
712 v = int(v) |
|
713 elif k == 'private' or k == 'no-cache': |
|
714 # Optional list argument |
|
715 if v is not None: |
|
716 v = [field.strip().lower() for field in v.split(',')] |
|
717 return k, v |
|
718 |
|
719 def generateCacheControl((k, v)): |
|
720 if v is None: |
|
721 return str(k) |
|
722 else: |
|
723 if k == 'no-cache' or k == 'private': |
|
724 # quoted list of values |
|
725 v = quoteString(generateList( |
|
726 [header_case_mapping.get(name) or dashCapitalize(name) for name in v])) |
|
727 return '%s=%s' % (k,v) |
|
728 |
|
729 def generateContentRange(tup): |
|
730 """tup is (type, start, end, len) |
|
731 len can be None. |
|
732 """ |
|
733 type, start, end, len = tup |
|
734 if len == None: |
|
735 len = '*' |
|
736 else: |
|
737 len = int(len) |
|
738 if start == None and end == None: |
|
739 startend = '*' |
|
740 else: |
|
741 startend = '%d-%d' % (start, end) |
|
742 |
|
743 return '%s %s/%s' % (type, startend, len) |
|
744 |
|
745 def generateDateTime(secSinceEpoch): |
|
746 """Convert seconds since epoch to HTTP datetime string.""" |
|
747 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(secSinceEpoch) |
|
748 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( |
|
749 weekdayname[wd], |
|
750 day, monthname[month], year, |
|
751 hh, mm, ss) |
|
752 return s |
|
753 |
|
754 def generateExpect(item): |
|
755 if item[1][0] is None: |
|
756 out = '%s' % (item[0],) |
|
757 else: |
|
758 out = '%s=%s' % (item[0], item[1][0]) |
|
759 if len(item[1]) > 1: |
|
760 out += ';'+generateKeyValues(item[1][1:]) |
|
761 return out |
|
762 |
|
763 def generateRange(range): |
|
764 def noneOr(s): |
|
765 if s is None: |
|
766 return '' |
|
767 return s |
|
768 |
|
769 type,ranges=range |
|
770 |
|
771 if type != 'bytes': |
|
772 raise ValueError("Unknown range unit: "+type+".") |
|
773 |
|
774 return (type+'='+ |
|
775 ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1])) |
|
776 for startend in ranges])) |
|
777 |
|
778 def generateRetryAfter(when): |
|
779 # always generate delta seconds format |
|
780 return str(int(when - time.time())) |
|
781 |
|
782 def generateContentType(mimeType): |
|
783 out="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) |
|
784 if mimeType.params: |
|
785 out+=';'+generateKeyValues(mimeType.params.iteritems()) |
|
786 return out |
|
787 |
|
788 def generateIfRange(dateOrETag): |
|
789 if isinstance(dateOrETag, ETag): |
|
790 return dateOrETag.generate() |
|
791 else: |
|
792 return generateDateTime(dateOrETag) |
|
793 |
|
794 # WWW-Authenticate and Authorization |
|
795 |
|
796 def generateWWWAuthenticate(headers): |
|
797 _generated = [] |
|
798 for seq in headers: |
|
799 scheme, challenge = seq[0], seq[1] |
|
800 |
|
801 # If we're going to parse out to something other than a dict |
|
802 # we need to be able to generate from something other than a dict |
|
803 |
|
804 try: |
|
805 l = [] |
|
806 for k,v in dict(challenge).iteritems(): |
|
807 l.append("%s=%s" % (k, quoteString(v))) |
|
808 |
|
809 _generated.append("%s %s" % (scheme, ", ".join(l))) |
|
810 except ValueError: |
|
811 _generated.append("%s %s" % (scheme, challenge)) |
|
812 |
|
813 return _generated |
|
814 |
|
815 def generateAuthorization(seq): |
|
816 return [' '.join(seq)] |
|
817 |
|
818 |
|
819 #### |
|
820 class ETag(object): |
|
821 def __init__(self, tag, weak=False): |
|
822 self.tag = str(tag) |
|
823 self.weak = weak |
|
824 |
|
825 def match(self, other, strongCompare): |
|
826 # Sec 13.3. |
|
827 # The strong comparison function: in order to be considered equal, both |
|
828 # validators MUST be identical in every way, and both MUST NOT be weak. |
|
829 # |
|
830 # The weak comparison function: in order to be considered equal, both |
|
831 # validators MUST be identical in every way, but either or both of |
|
832 # them MAY be tagged as "weak" without affecting the result. |
|
833 |
|
834 if not isinstance(other, ETag) or other.tag != self.tag: |
|
835 return False |
|
836 |
|
837 if strongCompare and (other.weak or self.weak): |
|
838 return False |
|
839 return True |
|
840 |
|
841 def __eq__(self, other): |
|
842 return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak |
|
843 |
|
844 def __ne__(self, other): |
|
845 return not self.__eq__(other) |
|
846 |
|
847 def __repr__(self): |
|
848 return "Etag(%r, weak=%r)" % (self.tag, self.weak) |
|
849 |
|
850 def parse(tokens): |
|
851 tokens=tuple(tokens) |
|
852 if len(tokens) == 1 and not isinstance(tokens[0], Token): |
|
853 return ETag(tokens[0]) |
|
854 |
|
855 if(len(tokens) == 3 and tokens[0] == "w" |
|
856 and tokens[1] == Token('/')): |
|
857 return ETag(tokens[2], weak=True) |
|
858 |
|
859 raise ValueError("Invalid ETag.") |
|
860 |
|
861 parse=staticmethod(parse) |
|
862 |
|
863 def generate(self): |
|
864 if self.weak: |
|
865 return 'W/'+quoteString(self.tag) |
|
866 else: |
|
867 return quoteString(self.tag) |
|
868 |
|
869 def parseStarOrETag(tokens): |
|
870 tokens=tuple(tokens) |
|
871 if tokens == ('*',): |
|
872 return '*' |
|
873 else: |
|
874 return ETag.parse(tokens) |
|
875 |
|
876 def generateStarOrETag(etag): |
|
877 if etag=='*': |
|
878 return etag |
|
879 else: |
|
880 return etag.generate() |
|
881 |
|
882 #### Cookies. Blech! |
|
883 class Cookie(object): |
|
884 # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', 'discard', 'secure', 'comment', 'commenturl', 'version'] |
|
885 |
|
886 def __init__(self, name, value, path=None, domain=None, ports=None, expires=None, discard=False, secure=False, comment=None, commenturl=None, version=0): |
|
887 self.name=name |
|
888 self.value=value |
|
889 self.path=path |
|
890 self.domain=domain |
|
891 self.ports=ports |
|
892 self.expires=expires |
|
893 self.discard=discard |
|
894 self.secure=secure |
|
895 self.comment=comment |
|
896 self.commenturl=commenturl |
|
897 self.version=version |
|
898 |
|
899 def __repr__(self): |
|
900 s="Cookie(%r=%r" % (self.name, self.value) |
|
901 if self.path is not None: s+=", path=%r" % (self.path,) |
|
902 if self.domain is not None: s+=", domain=%r" % (self.domain,) |
|
903 if self.ports is not None: s+=", ports=%r" % (self.ports,) |
|
904 if self.expires is not None: s+=", expires=%r" % (self.expires,) |
|
905 if self.secure is not False: s+=", secure=%r" % (self.secure,) |
|
906 if self.comment is not None: s+=", comment=%r" % (self.comment,) |
|
907 if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,) |
|
908 if self.version != 0: s+=", version=%r" % (self.version,) |
|
909 s+=")" |
|
910 return s |
|
911 |
|
912 def __eq__(self, other): |
|
913 return (isinstance(other, Cookie) and |
|
914 other.path == self.path and |
|
915 other.domain == self.domain and |
|
916 other.ports == self.ports and |
|
917 other.expires == self.expires and |
|
918 other.secure == self.secure and |
|
919 other.comment == self.comment and |
|
920 other.commenturl == self.commenturl and |
|
921 other.version == self.version) |
|
922 |
|
923 def __ne__(self, other): |
|
924 return not self.__eq__(other) |
|
925 |
|
926 |
|
927 def parseCookie(headers): |
|
928 """Bleargh, the cookie spec sucks. |
|
929 This surely needs interoperability testing. |
|
930 There are two specs that are supported: |
|
931 Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html |
|
932 Version 1) http://www.faqs.org/rfcs/rfc2965.html |
|
933 """ |
|
934 |
|
935 cookies = [] |
|
936 # There can't really be multiple cookie headers according to RFC, because |
|
937 # if multiple headers are allowed, they must be joinable with ",". |
|
938 # Neither new RFC2965 cookies nor old netscape cookies are. |
|
939 |
|
940 header = ';'.join(headers) |
|
941 if header[0:8].lower() == "$version": |
|
942 # RFC2965 cookie |
|
943 h=tokenize([header], foldCase=False) |
|
944 r_cookies = split(h, Token(',')) |
|
945 for r_cookie in r_cookies: |
|
946 last_cookie = None |
|
947 rr_cookies = split(r_cookie, Token(';')) |
|
948 for cookie in rr_cookies: |
|
949 nameval = tuple(split(cookie, Token('='))) |
|
950 if len(nameval) == 2: |
|
951 (name,), (value,) = nameval |
|
952 else: |
|
953 (name,), = nameval |
|
954 value = None |
|
955 |
|
956 name=name.lower() |
|
957 if name == '$version': |
|
958 continue |
|
959 if name[0] == '$': |
|
960 if last_cookie is not None: |
|
961 if name == '$path': |
|
962 last_cookie.path=value |
|
963 elif name == '$domain': |
|
964 last_cookie.domain=value |
|
965 elif name == '$port': |
|
966 if value is None: |
|
967 last_cookie.ports = () |
|
968 else: |
|
969 last_cookie.ports=tuple([int(s) for s in value.split(',')]) |
|
970 else: |
|
971 last_cookie = Cookie(name, value, version=1) |
|
972 cookies.append(last_cookie) |
|
973 else: |
|
974 # Oldstyle cookies don't do quoted strings or anything sensible. |
|
975 # All characters are valid for names except ';' and '=', and all |
|
976 # characters are valid for values except ';'. Spaces are stripped, |
|
977 # however. |
|
978 r_cookies = header.split(';') |
|
979 for r_cookie in r_cookies: |
|
980 name,value = r_cookie.split('=', 1) |
|
981 name=name.strip(' \t') |
|
982 value=value.strip(' \t') |
|
983 |
|
984 cookies.append(Cookie(name, value)) |
|
985 |
|
986 return cookies |
|
987 |
|
988 cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$" |
|
989 cookie_validname_re = re.compile(cookie_validname) |
|
990 cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$' |
|
991 cookie_validvalue_re = re.compile(cookie_validvalue) |
|
992 |
|
993 def generateCookie(cookies): |
|
994 # There's a fundamental problem with the two cookie specifications. |
|
995 # They both use the "Cookie" header, and the RFC Cookie header only allows |
|
996 # one version to be specified. Thus, when you have a collection of V0 and |
|
997 # V1 cookies, you have to either send them all as V0 or send them all as |
|
998 # V1. |
|
999 |
|
1000 # I choose to send them all as V1. |
|
1001 |
|
1002 # You might think converting a V0 cookie to a V1 cookie would be lossless, |
|
1003 # but you'd be wrong. If you do the conversion, and a V0 parser tries to |
|
1004 # read the cookie, it will see a modified form of the cookie, in cases |
|
1005 # where quotes must be added to conform to proper V1 syntax. |
|
1006 # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y") |
|
1007 |
|
1008 # However, that is what we will do, anyways. It has a high probability of |
|
1009 # breaking applications that only handle oldstyle cookies, where some other |
|
1010 # application set a newstyle cookie that is applicable over for site |
|
1011 # (or host), AND where the oldstyle cookie uses a value which is invalid |
|
1012 # syntax in a newstyle cookie. |
|
1013 |
|
1014 # Also, the cookie name *cannot* be quoted in V1, so some cookies just |
|
1015 # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These |
|
1016 # are just dicarded during conversion. |
|
1017 |
|
1018 # As this is an unsolvable problem, I will pretend I can just say |
|
1019 # OH WELL, don't do that, or else upgrade your old applications to have |
|
1020 # newstyle cookie parsers. |
|
1021 |
|
1022 # I will note offhandedly that there are *many* sites which send V0 cookies |
|
1023 # that are not valid V1 cookie syntax. About 20% for my cookies file. |
|
1024 # However, they do not generally mix them with V1 cookies, so this isn't |
|
1025 # an issue, at least right now. I have not tested to see how many of those |
|
1026 # webapps support RFC2965 V1 cookies. I suspect not many. |
|
1027 |
|
1028 max_version = max([cookie.version for cookie in cookies]) |
|
1029 |
|
1030 if max_version == 0: |
|
1031 # no quoting or anything. |
|
1032 return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies]) |
|
1033 else: |
|
1034 str_cookies = ['$Version="1"'] |
|
1035 for cookie in cookies: |
|
1036 if cookie.version == 0: |
|
1037 # Version 0 cookie: we make sure the name and value are valid |
|
1038 # V1 syntax. |
|
1039 |
|
1040 # If they are, we use them as is. This means in *most* cases, |
|
1041 # the cookie will look literally the same on output as it did |
|
1042 # on input. |
|
1043 # If it isn't a valid name, ignore the cookie. |
|
1044 # If it isn't a valid value, quote it and hope for the best on |
|
1045 # the other side. |
|
1046 |
|
1047 if cookie_validname_re.match(cookie.name) is None: |
|
1048 continue |
|
1049 |
|
1050 value=cookie.value |
|
1051 if cookie_validvalue_re.match(cookie.value) is None: |
|
1052 value = quoteString(value) |
|
1053 |
|
1054 str_cookies.append("%s=%s" % (cookie.name, value)) |
|
1055 else: |
|
1056 # V1 cookie, nice and easy |
|
1057 str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value))) |
|
1058 |
|
1059 if cookie.path: |
|
1060 str_cookies.append("$Path=%s" % quoteString(cookie.path)) |
|
1061 if cookie.domain: |
|
1062 str_cookies.append("$Domain=%s" % quoteString(cookie.domain)) |
|
1063 if cookie.ports is not None: |
|
1064 if len(cookie.ports) == 0: |
|
1065 str_cookies.append("$Port") |
|
1066 else: |
|
1067 str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) |
|
1068 return ';'.join(str_cookies) |
|
1069 |
|
1070 def parseSetCookie(headers): |
|
1071 setCookies = [] |
|
1072 for header in headers: |
|
1073 try: |
|
1074 parts = header.split(';') |
|
1075 l = [] |
|
1076 |
|
1077 for part in parts: |
|
1078 namevalue = part.split('=',1) |
|
1079 if len(namevalue) == 1: |
|
1080 name=namevalue[0] |
|
1081 value=None |
|
1082 else: |
|
1083 name,value=namevalue |
|
1084 value=value.strip(' \t') |
|
1085 |
|
1086 name=name.strip(' \t') |
|
1087 |
|
1088 l.append((name, value)) |
|
1089 |
|
1090 setCookies.append(makeCookieFromList(l, True)) |
|
1091 except ValueError: |
|
1092 # If we can't parse one Set-Cookie, ignore it, |
|
1093 # but not the rest of Set-Cookies. |
|
1094 pass |
|
1095 return setCookies |
|
1096 |
|
1097 def parseSetCookie2(toks): |
|
1098 outCookies = [] |
|
1099 for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))] |
|
1100 for y in split(toks, Token(','))]: |
|
1101 try: |
|
1102 outCookies.append(makeCookieFromList(cookie, False)) |
|
1103 except ValueError: |
|
1104 # Again, if we can't handle one cookie -- ignore it. |
|
1105 pass |
|
1106 return outCookies |
|
1107 |
|
1108 def makeCookieFromList(tup, netscapeFormat): |
|
1109 name, value = tup[0] |
|
1110 if name is None or value is None: |
|
1111 raise ValueError("Cookie has missing name or value") |
|
1112 if name.startswith("$"): |
|
1113 raise ValueError("Invalid cookie name: %r, starts with '$'." % name) |
|
1114 cookie = Cookie(name, value) |
|
1115 hadMaxAge = False |
|
1116 |
|
1117 for name,value in tup[1:]: |
|
1118 name = name.lower() |
|
1119 |
|
1120 if value is None: |
|
1121 if name in ("discard", "secure"): |
|
1122 # Boolean attrs |
|
1123 value = True |
|
1124 elif name != "port": |
|
1125 # Can be either boolean or explicit |
|
1126 continue |
|
1127 |
|
1128 if name in ("comment", "commenturl", "discard", "domain", "path", "secure"): |
|
1129 # simple cases |
|
1130 setattr(cookie, name, value) |
|
1131 elif name == "expires" and not hadMaxAge: |
|
1132 if netscapeFormat and value[0] == '"' and value[-1] == '"': |
|
1133 value = value[1:-1] |
|
1134 cookie.expires = parseDateTime(value) |
|
1135 elif name == "max-age": |
|
1136 hadMaxAge = True |
|
1137 cookie.expires = int(value) + time.time() |
|
1138 elif name == "port": |
|
1139 if value is None: |
|
1140 cookie.ports = () |
|
1141 else: |
|
1142 if netscapeFormat and value[0] == '"' and value[-1] == '"': |
|
1143 value = value[1:-1] |
|
1144 cookie.ports = tuple([int(s) for s in value.split(',')]) |
|
1145 elif name == "version": |
|
1146 cookie.version = int(value) |
|
1147 |
|
1148 return cookie |
|
1149 |
|
1150 |
|
1151 def generateSetCookie(cookies): |
|
1152 setCookies = [] |
|
1153 for cookie in cookies: |
|
1154 out = ["%s=%s" % (cookie.name, cookie.value)] |
|
1155 if cookie.expires: |
|
1156 out.append("expires=%s" % generateDateTime(cookie.expires)) |
|
1157 if cookie.path: |
|
1158 out.append("path=%s" % cookie.path) |
|
1159 if cookie.domain: |
|
1160 out.append("domain=%s" % cookie.domain) |
|
1161 if cookie.secure: |
|
1162 out.append("secure") |
|
1163 |
|
1164 setCookies.append('; '.join(out)) |
|
1165 return setCookies |
|
1166 |
|
1167 def generateSetCookie2(cookies): |
|
1168 setCookies = [] |
|
1169 for cookie in cookies: |
|
1170 out = ["%s=%s" % (cookie.name, quoteString(cookie.value))] |
|
1171 if cookie.comment: |
|
1172 out.append("Comment=%s" % quoteString(cookie.comment)) |
|
1173 if cookie.commenturl: |
|
1174 out.append("CommentURL=%s" % quoteString(cookie.commenturl)) |
|
1175 if cookie.discard: |
|
1176 out.append("Discard") |
|
1177 if cookie.domain: |
|
1178 out.append("Domain=%s" % quoteString(cookie.domain)) |
|
1179 if cookie.expires: |
|
1180 out.append("Max-Age=%s" % (cookie.expires - time.time())) |
|
1181 if cookie.path: |
|
1182 out.append("Path=%s" % quoteString(cookie.path)) |
|
1183 if cookie.ports is not None: |
|
1184 if len(cookie.ports) == 0: |
|
1185 out.append("Port") |
|
1186 else: |
|
1187 out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) |
|
1188 if cookie.secure: |
|
1189 out.append("Secure") |
|
1190 out.append('Version="1"') |
|
1191 setCookies.append('; '.join(out)) |
|
1192 return setCookies |
|
1193 |
|
1194 def parseDepth(depth): |
|
1195 if depth not in ("0", "1", "infinity"): |
|
1196 raise ValueError("Invalid depth header value: %s" % (depth,)) |
|
1197 return depth |
|
1198 |
|
1199 def parseOverWrite(overwrite): |
|
1200 if overwrite == "F": |
|
1201 return False |
|
1202 elif overwrite == "T": |
|
1203 return True |
|
1204 raise ValueError("Invalid overwrite header value: %s" % (overwrite,)) |
|
1205 |
|
1206 def generateOverWrite(overwrite): |
|
1207 if overwrite: |
|
1208 return "T" |
|
1209 else: |
|
1210 return "F" |
|
1211 |
|
1212 ##### Random stuff that looks useful. |
|
1213 # def sortMimeQuality(s): |
|
1214 # def sorter(item1, item2): |
|
1215 # if item1[0] == '*': |
|
1216 # if item2[0] == '*': |
|
1217 # return 0 |
|
1218 |
|
1219 |
|
1220 # def sortQuality(s): |
|
1221 # def sorter(item1, item2): |
|
1222 # if item1[1] < item2[1]: |
|
1223 # return -1 |
|
1224 # if item1[1] < item2[1]: |
|
1225 # return 1 |
|
1226 # if item1[0] == item2[0]: |
|
1227 # return 0 |
|
1228 |
|
1229 |
|
1230 # def getMimeQuality(mimeType, accepts): |
|
1231 # type,args = parseArgs(mimeType) |
|
1232 # type=type.split(Token('/')) |
|
1233 # if len(type) != 2: |
|
1234 # raise ValueError, "MIME Type "+s+" invalid." |
|
1235 |
|
1236 # for accept in accepts: |
|
1237 # accept,acceptQual=accept |
|
1238 # acceptType=accept[0:1] |
|
1239 # acceptArgs=accept[2] |
|
1240 |
|
1241 # if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and |
|
1242 # (args == acceptArgs or len(acceptArgs) == 0)): |
|
1243 # return acceptQual |
|
1244 |
|
1245 # def getQuality(type, accepts): |
|
1246 # qual = accepts.get(type) |
|
1247 # if qual is not None: |
|
1248 # return qual |
|
1249 |
|
1250 # return accepts.get('*') |
|
1251 |
|
1252 # Headers object |
|
1253 class __RecalcNeeded(object): |
|
1254 def __repr__(self): |
|
1255 return "<RecalcNeeded>" |
|
1256 |
|
1257 _RecalcNeeded = __RecalcNeeded() |
|
1258 |
|
1259 class Headers(object): |
|
1260 """This class stores the HTTP headers as both a parsed representation and |
|
1261 the raw string representation. It converts between the two on demand.""" |
|
1262 |
|
1263 def __init__(self, headers=None, rawHeaders=None, handler=DefaultHTTPHandler): |
|
1264 self._raw_headers = {} |
|
1265 self._headers = {} |
|
1266 self.handler = handler |
|
1267 if headers is not None: |
|
1268 for key, value in headers.iteritems(): |
|
1269 self.setHeader(key, value) |
|
1270 if rawHeaders is not None: |
|
1271 for key, value in rawHeaders.iteritems(): |
|
1272 self.setRawHeaders(key, value) |
|
1273 |
|
1274 def _setRawHeaders(self, headers): |
|
1275 self._raw_headers = headers |
|
1276 self._headers = {} |
|
1277 |
|
1278 def _toParsed(self, name): |
|
1279 r = self._raw_headers.get(name, None) |
|
1280 h = self.handler.parse(name, r) |
|
1281 if h is not None: |
|
1282 self._headers[name] = h |
|
1283 return h |
|
1284 |
|
1285 def _toRaw(self, name): |
|
1286 h = self._headers.get(name, None) |
|
1287 r = self.handler.generate(name, h) |
|
1288 if r is not None: |
|
1289 self._raw_headers[name] = r |
|
1290 return r |
|
1291 |
|
1292 def hasHeader(self, name): |
|
1293 """Does a header with the given name exist?""" |
|
1294 name=name.lower() |
|
1295 return self._raw_headers.has_key(name) |
|
1296 |
|
1297 def getRawHeaders(self, name, default=None): |
|
1298 """Returns a list of headers matching the given name as the raw string given.""" |
|
1299 |
|
1300 name=name.lower() |
|
1301 raw_header = self._raw_headers.get(name, default) |
|
1302 if raw_header is not _RecalcNeeded: |
|
1303 return raw_header |
|
1304 |
|
1305 return self._toRaw(name) |
|
1306 |
|
1307 def getHeader(self, name, default=None): |
|
1308 """Ret9urns the parsed representation of the given header. |
|
1309 The exact form of the return value depends on the header in question. |
|
1310 |
|
1311 If no parser for the header exists, raise ValueError. |
|
1312 |
|
1313 If the header doesn't exist, return default (or None if not specified) |
|
1314 """ |
|
1315 name=name.lower() |
|
1316 parsed = self._headers.get(name, default) |
|
1317 if parsed is not _RecalcNeeded: |
|
1318 return parsed |
|
1319 return self._toParsed(name) |
|
1320 |
|
1321 def setRawHeaders(self, name, value): |
|
1322 """Sets the raw representation of the given header. |
|
1323 Value should be a list of strings, each being one header of the |
|
1324 given name. |
|
1325 """ |
|
1326 name=name.lower() |
|
1327 self._raw_headers[name] = value |
|
1328 self._headers[name] = _RecalcNeeded |
|
1329 |
|
1330 def setHeader(self, name, value): |
|
1331 """Sets the parsed representation of the given header. |
|
1332 Value should be a list of objects whose exact form depends |
|
1333 on the header in question. |
|
1334 """ |
|
1335 name=name.lower() |
|
1336 self._raw_headers[name] = _RecalcNeeded |
|
1337 self._headers[name] = value |
|
1338 |
|
1339 def addRawHeader(self, name, value): |
|
1340 """ |
|
1341 Add a raw value to a header that may or may not already exist. |
|
1342 If it exists, add it as a separate header to output; do not |
|
1343 replace anything. |
|
1344 """ |
|
1345 name=name.lower() |
|
1346 raw_header = self._raw_headers.get(name) |
|
1347 if raw_header is None: |
|
1348 # No header yet |
|
1349 raw_header = [] |
|
1350 self._raw_headers[name] = raw_header |
|
1351 elif raw_header is _RecalcNeeded: |
|
1352 raw_header = self._toRaw(name) |
|
1353 |
|
1354 raw_header.append(value) |
|
1355 self._headers[name] = _RecalcNeeded |
|
1356 |
|
1357 def removeHeader(self, name): |
|
1358 """Removes the header named.""" |
|
1359 |
|
1360 name=name.lower() |
|
1361 if self._raw_headers.has_key(name): |
|
1362 del self._raw_headers[name] |
|
1363 del self._headers[name] |
|
1364 |
|
1365 def __repr__(self): |
|
1366 return '<Headers: Raw: %s Parsed: %s>'% (self._raw_headers, self._headers) |
|
1367 |
|
1368 def canonicalNameCaps(self, name): |
|
1369 """Return the name with the canonical capitalization, if known, |
|
1370 otherwise, Caps-After-Dashes""" |
|
1371 return header_case_mapping.get(name) or dashCapitalize(name) |
|
1372 |
|
1373 def getAllRawHeaders(self): |
|
1374 """Return an iterator of key,value pairs of all headers |
|
1375 contained in this object, as strings. The keys are capitalized |
|
1376 in canonical capitalization.""" |
|
1377 for k,v in self._raw_headers.iteritems(): |
|
1378 if v is _RecalcNeeded: |
|
1379 v = self._toRaw(k) |
|
1380 yield self.canonicalNameCaps(k), v |
|
1381 |
|
1382 def makeImmutable(self): |
|
1383 """Make this header set immutable. All mutating operations will |
|
1384 raise an exception.""" |
|
1385 self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise |
|
1386 |
|
1387 def _mutateRaise(self, *args): |
|
1388 raise AttributeError("This header object is immutable as the headers have already been sent.") |
|
1389 |
|
1390 |
|
1391 """The following dicts are all mappings of header to list of operations |
|
1392 to perform. The first operation should generally be 'tokenize' if the |
|
1393 header can be parsed according to the normal tokenization rules. If |
|
1394 it cannot, generally the first thing you want to do is take only the |
|
1395 last instance of the header (in case it was sent multiple times, which |
|
1396 is strictly an error, but we're nice.). |
|
1397 """ |
|
1398 |
|
1399 iteritems = lambda x: x.iteritems() |
|
1400 |
|
1401 |
|
1402 parser_general_headers = { |
|
1403 'Cache-Control':(tokenize, listParser(parseCacheControl), dict), |
|
1404 'Connection':(tokenize,filterTokens), |
|
1405 'Date':(last,parseDateTime), |
|
1406 # 'Pragma':tokenize |
|
1407 # 'Trailer':tokenize |
|
1408 'Transfer-Encoding':(tokenize,filterTokens), |
|
1409 # 'Upgrade':tokenize |
|
1410 # 'Via':tokenize,stripComment |
|
1411 # 'Warning':tokenize |
|
1412 } |
|
1413 |
|
1414 generator_general_headers = { |
|
1415 'Cache-Control':(iteritems, listGenerator(generateCacheControl), singleHeader), |
|
1416 'Connection':(generateList,singleHeader), |
|
1417 'Date':(generateDateTime,singleHeader), |
|
1418 # 'Pragma': |
|
1419 # 'Trailer': |
|
1420 'Transfer-Encoding':(generateList,singleHeader), |
|
1421 # 'Upgrade': |
|
1422 # 'Via': |
|
1423 # 'Warning': |
|
1424 } |
|
1425 |
|
1426 parser_request_headers = { |
|
1427 'Accept': (tokenize, listParser(parseAccept), dict), |
|
1428 'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset), |
|
1429 'Accept-Encoding':(tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding), |
|
1430 'Accept-Language':(tokenize, listParser(parseAcceptQvalue), dict), |
|
1431 'Authorization': (last, parseAuthorization), |
|
1432 'Cookie':(parseCookie,), |
|
1433 'Expect':(tokenize, listParser(parseExpect), dict), |
|
1434 'From':(last,), |
|
1435 'Host':(last,), |
|
1436 'If-Match':(tokenize, listParser(parseStarOrETag), list), |
|
1437 'If-Modified-Since':(last, parseIfModifiedSince), |
|
1438 'If-None-Match':(tokenize, listParser(parseStarOrETag), list), |
|
1439 'If-Range':(parseIfRange,), |
|
1440 'If-Unmodified-Since':(last,parseDateTime), |
|
1441 'Max-Forwards':(last,int), |
|
1442 # 'Proxy-Authorization':str, # what is "credentials" |
|
1443 'Range':(tokenize, parseRange), |
|
1444 'Referer':(last,str), # TODO: URI object? |
|
1445 'TE':(tokenize, listParser(parseAcceptQvalue), dict), |
|
1446 'User-Agent':(last,str), |
|
1447 } |
|
1448 |
|
1449 generator_request_headers = { |
|
1450 'Accept': (iteritems,listGenerator(generateAccept),singleHeader), |
|
1451 'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), |
|
1452 'Accept-Encoding': (iteritems, removeDefaultEncoding, listGenerator(generateAcceptQvalue),singleHeader), |
|
1453 'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), |
|
1454 'Authorization': (generateAuthorization,), # what is "credentials" |
|
1455 'Cookie':(generateCookie,singleHeader), |
|
1456 'Expect':(iteritems, listGenerator(generateExpect), singleHeader), |
|
1457 'From':(str,singleHeader), |
|
1458 'Host':(str,singleHeader), |
|
1459 'If-Match':(listGenerator(generateStarOrETag), singleHeader), |
|
1460 'If-Modified-Since':(generateDateTime,singleHeader), |
|
1461 'If-None-Match':(listGenerator(generateStarOrETag), singleHeader), |
|
1462 'If-Range':(generateIfRange, singleHeader), |
|
1463 'If-Unmodified-Since':(generateDateTime,singleHeader), |
|
1464 'Max-Forwards':(str, singleHeader), |
|
1465 # 'Proxy-Authorization':str, # what is "credentials" |
|
1466 'Range':(generateRange,singleHeader), |
|
1467 'Referer':(str,singleHeader), |
|
1468 'TE': (iteritems, listGenerator(generateAcceptQvalue),singleHeader), |
|
1469 'User-Agent':(str,singleHeader), |
|
1470 } |
|
1471 |
|
1472 parser_response_headers = { |
|
1473 'Accept-Ranges':(tokenize, filterTokens), |
|
1474 'Age':(last,int), |
|
1475 'ETag':(tokenize, ETag.parse), |
|
1476 'Location':(last,), # TODO: URI object? |
|
1477 # 'Proxy-Authenticate' |
|
1478 'Retry-After':(last, parseRetryAfter), |
|
1479 'Server':(last,), |
|
1480 'Set-Cookie':(parseSetCookie,), |
|
1481 'Set-Cookie2':(tokenize, parseSetCookie2), |
|
1482 'Vary':(tokenize, filterTokens), |
|
1483 'WWW-Authenticate': (lambda h: tokenize(h, foldCase=False), |
|
1484 parseWWWAuthenticate,) |
|
1485 } |
|
1486 |
|
1487 generator_response_headers = { |
|
1488 'Accept-Ranges':(generateList, singleHeader), |
|
1489 'Age':(str, singleHeader), |
|
1490 'ETag':(ETag.generate, singleHeader), |
|
1491 'Location':(str, singleHeader), |
|
1492 # 'Proxy-Authenticate' |
|
1493 'Retry-After':(generateRetryAfter, singleHeader), |
|
1494 'Server':(str, singleHeader), |
|
1495 'Set-Cookie':(generateSetCookie,), |
|
1496 'Set-Cookie2':(generateSetCookie2,), |
|
1497 'Vary':(generateList, singleHeader), |
|
1498 'WWW-Authenticate':(generateWWWAuthenticate,) |
|
1499 } |
|
1500 |
|
1501 parser_entity_headers = { |
|
1502 'Allow':(lambda str:tokenize(str, foldCase=False), filterTokens), |
|
1503 'Content-Encoding':(tokenize, filterTokens), |
|
1504 'Content-Language':(tokenize, filterTokens), |
|
1505 'Content-Length':(last, int), |
|
1506 'Content-Location':(last,), # TODO: URI object? |
|
1507 'Content-MD5':(last, parseContentMD5), |
|
1508 'Content-Range':(last, parseContentRange), |
|
1509 'Content-Type':(lambda str:tokenize(str, foldCase=False), parseContentType), |
|
1510 'Expires':(last, parseExpires), |
|
1511 'Last-Modified':(last, parseDateTime), |
|
1512 } |
|
1513 |
|
1514 generator_entity_headers = { |
|
1515 'Allow':(generateList, singleHeader), |
|
1516 'Content-Encoding':(generateList, singleHeader), |
|
1517 'Content-Language':(generateList, singleHeader), |
|
1518 'Content-Length':(str, singleHeader), |
|
1519 'Content-Location':(str, singleHeader), |
|
1520 'Content-MD5':(base64.encodestring, lambda x: x.strip("\n"), singleHeader), |
|
1521 'Content-Range':(generateContentRange, singleHeader), |
|
1522 'Content-Type':(generateContentType, singleHeader), |
|
1523 'Expires':(generateDateTime, singleHeader), |
|
1524 'Last-Modified':(generateDateTime, singleHeader), |
|
1525 } |
|
1526 |
|
1527 DefaultHTTPHandler.updateParsers(parser_general_headers) |
|
1528 DefaultHTTPHandler.updateParsers(parser_request_headers) |
|
1529 DefaultHTTPHandler.updateParsers(parser_response_headers) |
|
1530 DefaultHTTPHandler.updateParsers(parser_entity_headers) |
|
1531 |
|
1532 DefaultHTTPHandler.updateGenerators(generator_general_headers) |
|
1533 DefaultHTTPHandler.updateGenerators(generator_request_headers) |
|
1534 DefaultHTTPHandler.updateGenerators(generator_response_headers) |
|
1535 DefaultHTTPHandler.updateGenerators(generator_entity_headers) |
|
1536 |
|
1537 |
|
1538 # casemappingify(DefaultHTTPParsers) |
|
1539 # casemappingify(DefaultHTTPGenerators) |
|
1540 |
|
1541 # lowerify(DefaultHTTPParsers) |
|
1542 # lowerify(DefaultHTTPGenerators) |