1 # This file has been extracted from the abandoned TwistedWeb2 project |
|
2 # http://twistedmatrix.com/trac/wiki/TwistedWeb2 |
|
3 |
|
4 |
|
5 import time |
|
6 from calendar import timegm |
|
7 import base64 |
|
8 import re |
|
9 |
|
10 from six import string_types |
|
11 from six.moves.urllib.parse import urlparse |
|
12 |
|
13 |
|
14 def dashCapitalize(s): |
|
15 ''' Capitalize a string, making sure to treat - as a word seperator ''' |
|
16 return '-'.join([ x.capitalize() for x in s.split('-')]) |
|
17 |
|
18 # datetime parsing and formatting |
|
19 weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] |
|
20 weekdayname_lower = [name.lower() for name in weekdayname] |
|
21 monthname = [None, |
|
22 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', |
|
23 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] |
|
24 monthname_lower = [name and name.lower() for name in monthname] |
|
25 |
|
26 # HTTP Header parsing API |
|
27 |
|
28 header_case_mapping = {} |
|
29 |
|
30 def casemappingify(d): |
|
31 global header_case_mapping |
|
32 newd = dict([(key.lower(), key) for key in d]) |
|
33 header_case_mapping.update(newd) |
|
34 |
|
35 def lowerify(d): |
|
36 return dict([(key.lower(), value) for key, value in d.items()]) |
|
37 |
|
38 |
|
39 class HeaderHandler(object): |
|
40 """HeaderHandler manages header generating and parsing functions. |
|
41 """ |
|
42 HTTPParsers = {} |
|
43 HTTPGenerators = {} |
|
44 |
|
45 def __init__(self, parsers=None, generators=None): |
|
46 """ |
|
47 @param parsers: A map of header names to parsing functions. |
|
48 @type parsers: L{dict} |
|
49 |
|
50 @param generators: A map of header names to generating functions. |
|
51 @type generators: L{dict} |
|
52 """ |
|
53 |
|
54 if parsers: |
|
55 self.HTTPParsers.update(parsers) |
|
56 if generators: |
|
57 self.HTTPGenerators.update(generators) |
|
58 |
|
59 def parse(self, name, header): |
|
60 """ |
|
61 Parse the given header based on its given name. |
|
62 |
|
63 @param name: The header name to parse. |
|
64 @type name: C{str} |
|
65 |
|
66 @param header: A list of unparsed headers. |
|
67 @type header: C{list} of C{str} |
|
68 |
|
69 @return: The return value is the parsed header representation, |
|
70 it is dependent on the header. See the HTTP Headers document. |
|
71 """ |
|
72 parser = self.HTTPParsers.get(name, None) |
|
73 if parser is None: |
|
74 raise ValueError("No header parser for header '%s', either add one or use getHeaderRaw." % (name,)) |
|
75 |
|
76 try: |
|
77 for p in parser: |
|
78 #print "==> Parsing %s: %s(%s)" % (name, repr(p), repr(header)) |
|
79 header = p(header) |
|
80 # if isinstance(h, types.GeneratorType): |
|
81 # h = list(h) |
|
82 except ValueError as v: |
|
83 # print v |
|
84 header = None |
|
85 |
|
86 return header |
|
87 |
|
88 def generate(self, name, header): |
|
89 """ |
|
90 Generate the given header based on its given name. |
|
91 |
|
92 @param name: The header name to generate. |
|
93 @type name: C{str} |
|
94 |
|
95 @param header: A parsed header, such as the output of |
|
96 L{HeaderHandler}.parse. |
|
97 |
|
98 @return: C{list} of C{str} each representing a generated HTTP header. |
|
99 """ |
|
100 generator = self.HTTPGenerators.get(name, None) |
|
101 |
|
102 if generator is None: |
|
103 # print self.generators |
|
104 raise ValueError("No header generator for header '%s', either add one or use setHeaderRaw." % (name,)) |
|
105 |
|
106 for g in generator: |
|
107 header = g(header) |
|
108 |
|
109 #self._raw_headers[name] = h |
|
110 return header |
|
111 |
|
112 def updateParsers(self, parsers): |
|
113 """Update en masse the parser maps. |
|
114 |
|
115 @param parsers: Map of header names to parser chains. |
|
116 @type parsers: C{dict} |
|
117 """ |
|
118 casemappingify(parsers) |
|
119 self.HTTPParsers.update(lowerify(parsers)) |
|
120 |
|
121 def addParser(self, name, value): |
|
122 """Add an individual parser chain for the given header. |
|
123 |
|
124 @param name: Name of the header to add |
|
125 @type name: C{str} |
|
126 |
|
127 @param value: The parser chain |
|
128 @type value: C{str} |
|
129 """ |
|
130 self.updateParsers({name: value}) |
|
131 |
|
132 def updateGenerators(self, generators): |
|
133 """Update en masse the generator maps. |
|
134 |
|
135 @param parsers: Map of header names to generator chains. |
|
136 @type parsers: C{dict} |
|
137 """ |
|
138 casemappingify(generators) |
|
139 self.HTTPGenerators.update(lowerify(generators)) |
|
140 |
|
141 def addGenerators(self, name, value): |
|
142 """Add an individual generator chain for the given header. |
|
143 |
|
144 @param name: Name of the header to add |
|
145 @type name: C{str} |
|
146 |
|
147 @param value: The generator chain |
|
148 @type value: C{str} |
|
149 """ |
|
150 self.updateGenerators({name: value}) |
|
151 |
|
152 def update(self, parsers, generators): |
|
153 """Conveniently update parsers and generators all at once. |
|
154 """ |
|
155 self.updateParsers(parsers) |
|
156 self.updateGenerators(generators) |
|
157 |
|
158 |
|
159 DefaultHTTPHandler = HeaderHandler() |
|
160 |
|
161 |
|
162 ## HTTP DateTime parser |
|
163 def parseDateTime(dateString): |
|
164 """Convert an HTTP date string (one of three formats) to seconds since epoch.""" |
|
165 parts = dateString.split() |
|
166 |
|
167 if not parts[0][0:3].lower() in weekdayname_lower: |
|
168 # Weekday is stupid. Might have been omitted. |
|
169 try: |
|
170 return parseDateTime("Sun, "+dateString) |
|
171 except ValueError: |
|
172 # Guess not. |
|
173 pass |
|
174 |
|
175 partlen = len(parts) |
|
176 if (partlen == 5 or partlen == 6) and parts[1].isdigit(): |
|
177 # 1st date format: Sun, 06 Nov 1994 08:49:37 GMT |
|
178 # (Note: "GMT" is literal, not a variable timezone) |
|
179 # (also handles without "GMT") |
|
180 # This is the normal format |
|
181 day = parts[1] |
|
182 month = parts[2] |
|
183 year = parts[3] |
|
184 time = parts[4] |
|
185 elif (partlen == 3 or partlen == 4) and parts[1].find('-') != -1: |
|
186 # 2nd date format: Sunday, 06-Nov-94 08:49:37 GMT |
|
187 # (Note: "GMT" is literal, not a variable timezone) |
|
188 # (also handles without without "GMT") |
|
189 # Two digit year, yucko. |
|
190 day, month, year = parts[1].split('-') |
|
191 time = parts[2] |
|
192 year = int(year) |
|
193 if year < 69: |
|
194 year = year + 2000 |
|
195 elif year < 100: |
|
196 year = year + 1900 |
|
197 elif len(parts) == 5: |
|
198 # 3rd date format: Sun Nov 6 08:49:37 1994 |
|
199 # ANSI C asctime() format. |
|
200 day = parts[2] |
|
201 month = parts[1] |
|
202 year = parts[4] |
|
203 time = parts[3] |
|
204 else: |
|
205 raise ValueError("Unknown datetime format %r" % dateString) |
|
206 |
|
207 day = int(day) |
|
208 month = int(monthname_lower.index(month.lower())) |
|
209 year = int(year) |
|
210 hour, min, sec = map(int, time.split(':')) |
|
211 return int(timegm((year, month, day, hour, min, sec))) |
|
212 |
|
213 |
|
214 ##### HTTP tokenizer |
|
215 class Token(str): |
|
216 __slots__=[] |
|
217 tokens = {} |
|
218 def __new__(self, char): |
|
219 token = Token.tokens.get(char) |
|
220 if token is None: |
|
221 Token.tokens[char] = token = str.__new__(self, char) |
|
222 return token |
|
223 |
|
224 def __repr__(self): |
|
225 return "Token(%s)" % str.__repr__(self) |
|
226 |
|
227 |
|
228 http_tokens = " \t\"()<>@,;:\\/[]?={}" |
|
229 http_ctls = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f" |
|
230 |
|
231 def tokenize(header, foldCase=True): |
|
232 """Tokenize a string according to normal HTTP header parsing rules. |
|
233 |
|
234 In particular: |
|
235 - Whitespace is irrelevant and eaten next to special separator tokens. |
|
236 Its existance (but not amount) is important between character strings. |
|
237 - Quoted string support including embedded backslashes. |
|
238 - Case is insignificant (and thus lowercased), except in quoted strings. |
|
239 (unless foldCase=False) |
|
240 - Multiple headers are concatenated with ',' |
|
241 |
|
242 NOTE: not all headers can be parsed with this function. |
|
243 |
|
244 Takes a raw header value (list of strings), and |
|
245 Returns a generator of strings and Token class instances. |
|
246 """ |
|
247 tokens = http_tokens |
|
248 ctls = http_ctls |
|
249 |
|
250 string = ",".join(header) |
|
251 list = [] |
|
252 start = 0 |
|
253 cur = 0 |
|
254 quoted = False |
|
255 qpair = False |
|
256 inSpaces = -1 |
|
257 qstring = None |
|
258 |
|
259 for x in string: |
|
260 if quoted: |
|
261 if qpair: |
|
262 qpair = False |
|
263 qstring = qstring+string[start:cur-1]+x |
|
264 start = cur+1 |
|
265 elif x == '\\': |
|
266 qpair = True |
|
267 elif x == '"': |
|
268 quoted = False |
|
269 yield qstring+string[start:cur] |
|
270 qstring = None |
|
271 start = cur+1 |
|
272 elif x in tokens: |
|
273 if start != cur: |
|
274 if foldCase: |
|
275 yield string[start:cur].lower() |
|
276 else: |
|
277 yield string[start:cur] |
|
278 |
|
279 start = cur+1 |
|
280 if x == '"': |
|
281 quoted = True |
|
282 qstring = "" |
|
283 inSpaces = False |
|
284 elif x in " \t": |
|
285 if inSpaces is False: |
|
286 inSpaces = True |
|
287 else: |
|
288 inSpaces = -1 |
|
289 yield Token(x) |
|
290 elif x in ctls: |
|
291 raise ValueError("Invalid control character: %d in header" % ord(x)) |
|
292 else: |
|
293 if inSpaces is True: |
|
294 yield Token(' ') |
|
295 inSpaces = False |
|
296 |
|
297 inSpaces = False |
|
298 cur = cur+1 |
|
299 |
|
300 if qpair: |
|
301 raise ValueError("Missing character after '\\'") |
|
302 if quoted: |
|
303 raise ValueError("Missing end quote") |
|
304 |
|
305 if start != cur: |
|
306 if foldCase: |
|
307 yield string[start:cur].lower() |
|
308 else: |
|
309 yield string[start:cur] |
|
310 |
|
311 def split(seq, delim): |
|
312 """The same as str.split but works on arbitrary sequences. |
|
313 Too bad it's not builtin to python!""" |
|
314 |
|
315 cur = [] |
|
316 for item in seq: |
|
317 if item == delim: |
|
318 yield cur |
|
319 cur = [] |
|
320 else: |
|
321 cur.append(item) |
|
322 yield cur |
|
323 |
|
324 # def find(seq, *args): |
|
325 # """The same as seq.index but returns -1 if not found, instead |
|
326 # Too bad it's not builtin to python!""" |
|
327 # try: |
|
328 # return seq.index(value, *args) |
|
329 # except ValueError: |
|
330 # return -1 |
|
331 |
|
332 |
|
333 def filterTokens(seq): |
|
334 """Filter out instances of Token, leaving only a list of strings. |
|
335 |
|
336 Used instead of a more specific parsing method (e.g. splitting on commas) |
|
337 when only strings are expected, so as to be a little lenient. |
|
338 |
|
339 Apache does it this way and has some comments about broken clients which |
|
340 forget commas (?), so I'm doing it the same way. It shouldn't |
|
341 hurt anything, in any case. |
|
342 """ |
|
343 |
|
344 l = [] |
|
345 for x in seq: |
|
346 if not isinstance(x, Token): |
|
347 l.append(x) |
|
348 return l |
|
349 |
|
350 ##### parser utilities: |
|
351 def checkSingleToken(tokens): |
|
352 if len(tokens) != 1: |
|
353 raise ValueError("Expected single token, not %s." % (tokens,)) |
|
354 return tokens[0] |
|
355 |
|
356 def parseKeyValue(val): |
|
357 if len(val) == 1: |
|
358 return val[0], None |
|
359 elif len(val) == 3 and val[1] == Token('='): |
|
360 return val[0], val[2] |
|
361 raise ValueError("Expected key or key=value, but got %s." % (val,)) |
|
362 |
|
363 def parseArgs(field): |
|
364 args = split(field, Token(';')) |
|
365 val = next(args) |
|
366 args = [parseKeyValue(arg) for arg in args] |
|
367 return val, args |
|
368 |
|
369 def listParser(fun): |
|
370 """Return a function which applies 'fun' to every element in the |
|
371 comma-separated list""" |
|
372 def listParserHelper(tokens): |
|
373 fields = split(tokens, Token(',')) |
|
374 for field in fields: |
|
375 if len(field) != 0: |
|
376 yield fun(field) |
|
377 |
|
378 return listParserHelper |
|
379 |
|
380 def last(seq): |
|
381 """Return seq[-1]""" |
|
382 return seq[-1] |
|
383 |
|
384 def unique(seq): |
|
385 '''if seq is not a string, check it's a sequence of one element and return it''' |
|
386 if isinstance(seq, string_types): |
|
387 return seq |
|
388 if len(seq) != 1: |
|
389 raise ValueError('single value required, not %s' % seq) |
|
390 return seq[0] |
|
391 |
|
392 def parseHTTPMethod(method): |
|
393 """Ensure a HTTP method is valid according the rfc2616, but extension-method ones""" |
|
394 method = method.strip() |
|
395 if method not in ("OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE", |
|
396 "TRACE", "CONNECT"): |
|
397 raise ValueError('Unsupported HTTP method %s' % method) |
|
398 return method |
|
399 |
|
400 def parseAllowOrigin(origin): |
|
401 """Ensure origin is a valid URL-base stuff, or null""" |
|
402 if origin == 'null': |
|
403 return origin |
|
404 p = urlparse(origin) |
|
405 if p.params or p.query or p.username or p.path not in ('', '/'): |
|
406 raise ValueError('Incorrect Accept-Control-Allow-Origin value %s' % origin) |
|
407 if p.scheme not in ('http', 'https'): |
|
408 raise ValueError('Unsupported Accept-Control-Allow-Origin URL scheme %s' % origin) |
|
409 if not p.netloc: |
|
410 raise ValueError('Accept-Control-Allow-Origin: host name cannot be unset (%s)' % origin) |
|
411 return origin |
|
412 |
|
413 def parseAllowCreds(cred): |
|
414 """Can be "true" """ |
|
415 if cred: |
|
416 cred = cred.lower() |
|
417 if cred and cred != 'true': |
|
418 raise ValueError('Accept-Control-Allow-Credentials can only be "true" (%s)' % cred) |
|
419 return cred |
|
420 |
|
421 ##### Generation utilities |
|
422 def quoteString(s): |
|
423 return '"%s"' % s.replace('\\', '\\\\').replace('"', '\\"') |
|
424 |
|
425 def listGenerator(fun): |
|
426 """Return a function which applies 'fun' to every element in |
|
427 the given list, then joins the result with generateList""" |
|
428 def listGeneratorHelper(l): |
|
429 return generateList([fun(e) for e in l]) |
|
430 |
|
431 return listGeneratorHelper |
|
432 |
|
433 def generateList(seq): |
|
434 return ", ".join(seq) |
|
435 |
|
436 def singleHeader(item): |
|
437 return [item] |
|
438 |
|
439 def generateKeyValues(kvs): |
|
440 l = [] |
|
441 # print kvs |
|
442 for k, v in kvs: |
|
443 if v is None: |
|
444 l.append('%s' % k) |
|
445 else: |
|
446 l.append('%s=%s' % (k, v)) |
|
447 return ";".join(l) |
|
448 |
|
449 def generateTrueFalse(value): |
|
450 """ |
|
451 Return 'true' or 'false' depending on the value. |
|
452 |
|
453 * 'true' values are `True`, `1`, `"true"` |
|
454 * 'false' values are `False`, `0`, `"false"` |
|
455 |
|
456 """ |
|
457 if (value in (True, 1) or |
|
458 isinstance(value, string_types) and value.lower() == 'true'): |
|
459 return 'true' |
|
460 if (value in (False, 0) or |
|
461 isinstance(value, string_types) and value.lower() == 'false'): |
|
462 return 'false' |
|
463 raise ValueError("Invalid true/false header value: %s" % value) |
|
464 |
|
465 class MimeType(object): |
|
466 @classmethod |
|
467 def fromString(klass, mimeTypeString): |
|
468 """Generate a MimeType object from the given string. |
|
469 |
|
470 @param mimeTypeString: The mimetype to parse |
|
471 |
|
472 @return: L{MimeType} |
|
473 """ |
|
474 return DefaultHTTPHandler.parse('content-type', [mimeTypeString]) |
|
475 |
|
476 def __init__(self, mediaType, mediaSubtype, params={}, **kwargs): |
|
477 """ |
|
478 @type mediaType: C{str} |
|
479 |
|
480 @type mediaSubtype: C{str} |
|
481 |
|
482 @type params: C{dict} |
|
483 """ |
|
484 self.mediaType = mediaType |
|
485 self.mediaSubtype = mediaSubtype |
|
486 self.params = dict(params) |
|
487 |
|
488 if kwargs: |
|
489 self.params.update(kwargs) |
|
490 |
|
491 def __eq__(self, other): |
|
492 if not isinstance(other, MimeType): return NotImplemented |
|
493 return (self.mediaType == other.mediaType and |
|
494 self.mediaSubtype == other.mediaSubtype and |
|
495 self.params == other.params) |
|
496 |
|
497 def __ne__(self, other): |
|
498 return not self.__eq__(other) |
|
499 |
|
500 def __repr__(self): |
|
501 return "MimeType(%r, %r, %r)" % (self.mediaType, self.mediaSubtype, self.params) |
|
502 |
|
503 def __hash__(self): |
|
504 return hash(self.mediaType)^hash(self.mediaSubtype)^hash(tuple(self.params.items())) |
|
505 |
|
506 ##### Specific header parsers. |
|
507 def parseAccept(field): |
|
508 type, args = parseArgs(field) |
|
509 |
|
510 if len(type) != 3 or type[1] != Token('/'): |
|
511 raise ValueError("MIME Type "+str(type)+" invalid.") |
|
512 |
|
513 # okay, this spec is screwy. A 'q' parameter is used as the separator |
|
514 # between MIME parameters and (as yet undefined) additional HTTP |
|
515 # parameters. |
|
516 |
|
517 num = 0 |
|
518 for arg in args: |
|
519 if arg[0] == 'q': |
|
520 mimeparams = tuple(args[0:num]) |
|
521 params = args[num:] |
|
522 break |
|
523 num = num + 1 |
|
524 else: |
|
525 mimeparams = tuple(args) |
|
526 params = [] |
|
527 |
|
528 # Default values for parameters: |
|
529 qval = 1.0 |
|
530 |
|
531 # Parse accept parameters: |
|
532 for param in params: |
|
533 if param[0] == 'q': |
|
534 qval = float(param[1]) |
|
535 else: |
|
536 # Warn? ignored parameter. |
|
537 pass |
|
538 |
|
539 ret = MimeType(type[0], type[2], mimeparams), qval |
|
540 return ret |
|
541 |
|
542 def parseAcceptQvalue(field): |
|
543 type, args = parseArgs(field) |
|
544 |
|
545 type = checkSingleToken(type) |
|
546 |
|
547 qvalue = 1.0 # Default qvalue is 1 |
|
548 for arg in args: |
|
549 if arg[0] == 'q': |
|
550 qvalue = float(arg[1]) |
|
551 return type, qvalue |
|
552 |
|
553 def addDefaultCharset(charsets): |
|
554 if charsets.get('*') is None and charsets.get('iso-8859-1') is None: |
|
555 charsets['iso-8859-1'] = 1.0 |
|
556 return charsets |
|
557 |
|
558 def addDefaultEncoding(encodings): |
|
559 if encodings.get('*') is None and encodings.get('identity') is None: |
|
560 # RFC doesn't specify a default value for identity, only that it |
|
561 # "is acceptable" if not mentioned. Thus, give it a very low qvalue. |
|
562 encodings['identity'] = .0001 |
|
563 return encodings |
|
564 |
|
565 |
|
566 def parseContentType(header): |
|
567 # Case folding is disabled for this header, because of use of |
|
568 # Content-Type: multipart/form-data; boundary=CaSeFuLsTuFf |
|
569 # So, we need to explicitly .lower() the type/subtype and arg keys. |
|
570 |
|
571 type, args = parseArgs(header) |
|
572 |
|
573 if len(type) != 3 or type[1] != Token('/'): |
|
574 raise ValueError("MIME Type "+str(type)+" invalid.") |
|
575 |
|
576 args = [(kv[0].lower(), kv[1]) for kv in args] |
|
577 |
|
578 return MimeType(type[0].lower(), type[2].lower(), tuple(args)) |
|
579 |
|
580 def parseContentMD5(header): |
|
581 try: |
|
582 return base64.decodestring(header) |
|
583 except Exception as e: |
|
584 raise ValueError(e) |
|
585 |
|
586 def parseContentRange(header): |
|
587 """Parse a content-range header into (kind, start, end, realLength). |
|
588 |
|
589 realLength might be None if real length is not known ('*'). |
|
590 start and end might be None if start, end unspecified (for response code 416) |
|
591 """ |
|
592 kind, other = header.strip().split() |
|
593 if kind.lower() != "bytes": |
|
594 raise ValueError("a range of type %r is not supported") |
|
595 startend, realLength = other.split("/") |
|
596 if startend.strip() == '*': |
|
597 start, end = None, None |
|
598 else: |
|
599 start, end = map(int, startend.split("-")) |
|
600 if realLength == "*": |
|
601 realLength = None |
|
602 else: |
|
603 realLength = int(realLength) |
|
604 return (kind, start, end, realLength) |
|
605 |
|
606 def parseExpect(field): |
|
607 type, args = parseArgs(field) |
|
608 |
|
609 type = parseKeyValue(type) |
|
610 return (type[0], (lambda *args:args)(type[1], *args)) |
|
611 |
|
612 def parseExpires(header): |
|
613 # """HTTP/1.1 clients and caches MUST treat other invalid date formats, |
|
614 # especially including the value 0, as in the past (i.e., "already expired").""" |
|
615 |
|
616 try: |
|
617 return parseDateTime(header) |
|
618 except ValueError: |
|
619 return 0 |
|
620 |
|
621 def parseIfModifiedSince(header): |
|
622 # Ancient versions of netscape and *current* versions of MSIE send |
|
623 # If-Modified-Since: Thu, 05 Aug 2004 12:57:27 GMT; length=123 |
|
624 # which is blantantly RFC-violating and not documented anywhere |
|
625 # except bug-trackers for web frameworks. |
|
626 |
|
627 # So, we'll just strip off everything after a ';'. |
|
628 return parseDateTime(header.split(';', 1)[0]) |
|
629 |
|
630 def parseIfRange(headers): |
|
631 try: |
|
632 return ETag.parse(tokenize(headers)) |
|
633 except ValueError: |
|
634 return parseDateTime(last(headers)) |
|
635 |
|
636 def parseRange(range): |
|
637 range = list(range) |
|
638 if len(range) < 3 or range[1] != Token('='): |
|
639 raise ValueError("Invalid range header format: %s" %(range,)) |
|
640 |
|
641 type = range[0] |
|
642 if type != 'bytes': |
|
643 raise ValueError("Unknown range unit: %s." % (type,)) |
|
644 rangeset = split(range[2:], Token(',')) |
|
645 ranges = [] |
|
646 |
|
647 for byterangespec in rangeset: |
|
648 if len(byterangespec) != 1: |
|
649 raise ValueError("Invalid range header format: %s" % (range,)) |
|
650 start, end = byterangespec[0].split('-') |
|
651 |
|
652 if not start and not end: |
|
653 raise ValueError("Invalid range header format: %s" % (range,)) |
|
654 |
|
655 if start: |
|
656 start = int(start) |
|
657 else: |
|
658 start = None |
|
659 |
|
660 if end: |
|
661 end = int(end) |
|
662 else: |
|
663 end = None |
|
664 |
|
665 if start and end and start > end: |
|
666 raise ValueError("Invalid range header, start > end: %s" % (range,)) |
|
667 ranges.append((start, end)) |
|
668 return type, ranges |
|
669 |
|
670 def parseRetryAfter(header): |
|
671 try: |
|
672 # delta seconds |
|
673 return time.time() + int(header) |
|
674 except ValueError: |
|
675 # or datetime |
|
676 return parseDateTime(header) |
|
677 |
|
678 # WWW-Authenticate and Authorization |
|
679 |
|
680 def parseWWWAuthenticate(tokenized): |
|
681 headers = [] |
|
682 |
|
683 tokenList = list(tokenized) |
|
684 |
|
685 while tokenList: |
|
686 scheme = tokenList.pop(0) |
|
687 challenge = {} |
|
688 last = None |
|
689 kvChallenge = False |
|
690 |
|
691 while tokenList: |
|
692 token = tokenList.pop(0) |
|
693 if token == Token('='): |
|
694 kvChallenge = True |
|
695 challenge[last] = tokenList.pop(0) |
|
696 last = None |
|
697 |
|
698 elif token == Token(','): |
|
699 if kvChallenge: |
|
700 if len(tokenList) > 1 and tokenList[1] != Token('='): |
|
701 break |
|
702 |
|
703 else: |
|
704 break |
|
705 |
|
706 else: |
|
707 last = token |
|
708 |
|
709 if last and scheme and not challenge and not kvChallenge: |
|
710 challenge = last |
|
711 last = None |
|
712 |
|
713 headers.append((scheme, challenge)) |
|
714 |
|
715 if last and last not in (Token('='), Token(',')): |
|
716 if headers[-1] == (scheme, challenge): |
|
717 scheme = last |
|
718 challenge = {} |
|
719 headers.append((scheme, challenge)) |
|
720 |
|
721 return headers |
|
722 |
|
723 def parseAuthorization(header): |
|
724 scheme, rest = header.split(' ', 1) |
|
725 # this header isn't tokenized because it may eat characters |
|
726 # in the unquoted base64 encoded credentials |
|
727 return scheme.lower(), rest |
|
728 |
|
729 #### Header generators |
|
730 def generateAccept(accept): |
|
731 mimeType, q = accept |
|
732 |
|
733 out ="%s/%s"%(mimeType.mediaType, mimeType.mediaSubtype) |
|
734 if mimeType.params: |
|
735 out+=';'+generateKeyValues(mimeType.params.items()) |
|
736 |
|
737 if q != 1.0: |
|
738 out+=(';q=%.3f' % (q,)).rstrip('0').rstrip('.') |
|
739 |
|
740 return out |
|
741 |
|
742 def removeDefaultEncoding(seq): |
|
743 for item in seq: |
|
744 if item[0] != 'identity' or item[1] != .0001: |
|
745 yield item |
|
746 |
|
747 def generateAcceptQvalue(keyvalue): |
|
748 if keyvalue[1] == 1.0: |
|
749 return "%s" % keyvalue[0:1] |
|
750 else: |
|
751 return ("%s;q=%.3f" % keyvalue).rstrip('0').rstrip('.') |
|
752 |
|
753 def parseCacheControl(kv): |
|
754 k, v = parseKeyValue(kv) |
|
755 if k == 'max-age' or k == 'min-fresh' or k == 's-maxage': |
|
756 # Required integer argument |
|
757 if v is None: |
|
758 v = 0 |
|
759 else: |
|
760 v = int(v) |
|
761 elif k == 'max-stale': |
|
762 # Optional integer argument |
|
763 if v is not None: |
|
764 v = int(v) |
|
765 elif k == 'private' or k == 'no-cache': |
|
766 # Optional list argument |
|
767 if v is not None: |
|
768 v = [field.strip().lower() for field in v.split(',')] |
|
769 return k, v |
|
770 |
|
771 def generateCacheControl(args): |
|
772 k, v = args |
|
773 if v is None: |
|
774 return str(k) |
|
775 else: |
|
776 if k == 'no-cache' or k == 'private': |
|
777 # quoted list of values |
|
778 v = quoteString(generateList( |
|
779 [header_case_mapping.get(name) or dashCapitalize(name) for name in v])) |
|
780 return '%s=%s' % (k, v) |
|
781 |
|
782 def generateContentRange(tup): |
|
783 """tup is (type, start, end, len) |
|
784 len can be None. |
|
785 """ |
|
786 type, start, end, len = tup |
|
787 if len == None: |
|
788 len = '*' |
|
789 else: |
|
790 len = int(len) |
|
791 if start == None and end == None: |
|
792 startend = '*' |
|
793 else: |
|
794 startend = '%d-%d' % (start, end) |
|
795 |
|
796 return '%s %s/%s' % (type, startend, len) |
|
797 |
|
798 def generateDateTime(secSinceEpoch): |
|
799 """Convert seconds since epoch to HTTP datetime string.""" |
|
800 # take care gmtime doesn't handle time before epoch (crash on windows at least) |
|
801 year, month, day, hh, mm, ss, wd, y, z = time.gmtime(max(0, secSinceEpoch)) |
|
802 s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( |
|
803 weekdayname[wd], |
|
804 day, monthname[month], year, |
|
805 hh, mm, ss) |
|
806 return s |
|
807 |
|
808 def generateExpect(item): |
|
809 if item[1][0] is None: |
|
810 out = '%s' % (item[0],) |
|
811 else: |
|
812 out = '%s=%s' % (item[0], item[1][0]) |
|
813 if len(item[1]) > 1: |
|
814 out += ';'+generateKeyValues(item[1][1:]) |
|
815 return out |
|
816 |
|
817 def generateRange(range): |
|
818 def noneOr(s): |
|
819 if s is None: |
|
820 return '' |
|
821 return s |
|
822 |
|
823 type, ranges = range |
|
824 |
|
825 if type != 'bytes': |
|
826 raise ValueError("Unknown range unit: "+type+".") |
|
827 |
|
828 return (type+'='+ |
|
829 ','.join(['%s-%s' % (noneOr(startend[0]), noneOr(startend[1])) |
|
830 for startend in ranges])) |
|
831 |
|
832 def generateRetryAfter(when): |
|
833 # always generate delta seconds format |
|
834 return str(int(when - time.time())) |
|
835 |
|
836 def generateContentType(mimeType): |
|
837 out = "%s/%s" % (mimeType.mediaType, mimeType.mediaSubtype) |
|
838 if mimeType.params: |
|
839 out += ';' + generateKeyValues(mimeType.params.items()) |
|
840 return out |
|
841 |
|
842 def generateIfRange(dateOrETag): |
|
843 if isinstance(dateOrETag, ETag): |
|
844 return dateOrETag.generate() |
|
845 else: |
|
846 return generateDateTime(dateOrETag) |
|
847 |
|
848 # WWW-Authenticate and Authorization |
|
849 |
|
850 def generateWWWAuthenticate(headers): |
|
851 _generated = [] |
|
852 for seq in headers: |
|
853 scheme, challenge = seq[0], seq[1] |
|
854 |
|
855 # If we're going to parse out to something other than a dict |
|
856 # we need to be able to generate from something other than a dict |
|
857 |
|
858 try: |
|
859 l = [] |
|
860 for k, v in dict(challenge).items(): |
|
861 l.append("%s=%s" % (k, quoteString(v))) |
|
862 |
|
863 _generated.append("%s %s" % (scheme, ", ".join(l))) |
|
864 except ValueError: |
|
865 _generated.append("%s %s" % (scheme, challenge)) |
|
866 |
|
867 return _generated |
|
868 |
|
869 def generateAuthorization(seq): |
|
870 return [' '.join(str(v) for v in seq)] |
|
871 |
|
872 |
|
873 #### |
|
874 class ETag(object): |
|
875 def __init__(self, tag, weak=False): |
|
876 self.tag = str(tag) |
|
877 self.weak = weak |
|
878 |
|
879 def match(self, other, strongCompare): |
|
880 # Sec 13.3. |
|
881 # The strong comparison function: in order to be considered equal, both |
|
882 # validators MUST be identical in every way, and both MUST NOT be weak. |
|
883 # |
|
884 # The weak comparison function: in order to be considered equal, both |
|
885 # validators MUST be identical in every way, but either or both of |
|
886 # them MAY be tagged as "weak" without affecting the result. |
|
887 |
|
888 if not isinstance(other, ETag) or other.tag != self.tag: |
|
889 return False |
|
890 |
|
891 if strongCompare and (other.weak or self.weak): |
|
892 return False |
|
893 return True |
|
894 |
|
895 def __eq__(self, other): |
|
896 return isinstance(other, ETag) and other.tag == self.tag and other.weak == self.weak |
|
897 |
|
898 def __ne__(self, other): |
|
899 return not self.__eq__(other) |
|
900 |
|
901 def __repr__(self): |
|
902 return "Etag(%r, weak=%r)" % (self.tag, self.weak) |
|
903 |
|
904 def parse(tokens): |
|
905 tokens = tuple(tokens) |
|
906 if len(tokens) == 1 and not isinstance(tokens[0], Token): |
|
907 return ETag(tokens[0]) |
|
908 |
|
909 if(len(tokens) == 3 and tokens[0] == "w" |
|
910 and tokens[1] == Token('/')): |
|
911 return ETag(tokens[2], weak=True) |
|
912 |
|
913 raise ValueError("Invalid ETag.") |
|
914 |
|
915 parse = staticmethod(parse) |
|
916 |
|
917 def generate(self): |
|
918 if self.weak: |
|
919 return 'W/'+quoteString(self.tag) |
|
920 else: |
|
921 return quoteString(self.tag) |
|
922 |
|
923 def parseStarOrETag(tokens): |
|
924 tokens = tuple(tokens) |
|
925 if tokens == ('*',): |
|
926 return '*' |
|
927 else: |
|
928 return ETag.parse(tokens) |
|
929 |
|
930 def generateStarOrETag(etag): |
|
931 if etag == '*': |
|
932 return etag |
|
933 else: |
|
934 return etag.generate() |
|
935 |
|
936 #### Cookies. Blech! |
|
937 class Cookie(object): |
|
938 # __slots__ = ['name', 'value', 'path', 'domain', 'ports', 'expires', |
|
939 # 'discard', 'secure', 'httponly', 'comment', 'commenturl', |
|
940 # 'version'] |
|
941 |
|
942 def __init__(self, name, value, path=None, domain=None, ports=None, |
|
943 expires=None, discard=False, secure=False, httponly=False, |
|
944 comment=None, commenturl=None, version=0): |
|
945 self.name = name |
|
946 self.value = value |
|
947 self.path = path |
|
948 self.domain = domain |
|
949 self.ports = ports |
|
950 self.expires = expires |
|
951 self.discard = discard |
|
952 self.secure = secure |
|
953 self.httponly = httponly |
|
954 self.comment = comment |
|
955 self.commenturl = commenturl |
|
956 self.version = version |
|
957 |
|
958 def __repr__(self): |
|
959 s = "Cookie(%r=%r" % (self.name, self.value) |
|
960 if self.path is not None: s+=", path=%r" % (self.path,) |
|
961 if self.domain is not None: s+=", domain=%r" % (self.domain,) |
|
962 if self.ports is not None: s+=", ports=%r" % (self.ports,) |
|
963 if self.expires is not None: s+=", expires=%r" % (self.expires,) |
|
964 if self.secure: s+=", secure" |
|
965 if self.httponly: s+=", HttpOnly" |
|
966 if self.comment is not None: s+=", comment=%r" % (self.comment,) |
|
967 if self.commenturl is not None: s+=", commenturl=%r" % (self.commenturl,) |
|
968 if self.version != 0: s+=", version=%r" % (self.version,) |
|
969 s+=")" |
|
970 return s |
|
971 |
|
972 def __eq__(self, other): |
|
973 return (isinstance(other, Cookie) and |
|
974 other.path == self.path and |
|
975 other.domain == self.domain and |
|
976 other.ports == self.ports and |
|
977 other.expires == self.expires and |
|
978 other.secure == self.secure and |
|
979 other.comment == self.comment and |
|
980 other.commenturl == self.commenturl and |
|
981 other.version == self.version) |
|
982 |
|
983 def __ne__(self, other): |
|
984 return not self.__eq__(other) |
|
985 |
|
986 |
|
987 def parseCookie(headers): |
|
988 """Bleargh, the cookie spec sucks. |
|
989 This surely needs interoperability testing. |
|
990 There are two specs that are supported: |
|
991 Version 0) http://wp.netscape.com/newsref/std/cookie_spec.html |
|
992 Version 1) http://www.faqs.org/rfcs/rfc2965.html |
|
993 """ |
|
994 |
|
995 cookies = [] |
|
996 # There can't really be multiple cookie headers according to RFC, because |
|
997 # if multiple headers are allowed, they must be joinable with ",". |
|
998 # Neither new RFC2965 cookies nor old netscape cookies are. |
|
999 |
|
1000 header = ';'.join(headers) |
|
1001 if header[0:8].lower() == "$version": |
|
1002 # RFC2965 cookie |
|
1003 h = tokenize([header], foldCase=False) |
|
1004 r_cookies = split(h, Token(',')) |
|
1005 for r_cookie in r_cookies: |
|
1006 last_cookie = None |
|
1007 rr_cookies = split(r_cookie, Token(';')) |
|
1008 for cookie in rr_cookies: |
|
1009 nameval = tuple(split(cookie, Token('='))) |
|
1010 if len(nameval) == 2: |
|
1011 (name,), (value,) = nameval |
|
1012 else: |
|
1013 (name,), = nameval |
|
1014 value = None |
|
1015 |
|
1016 name = name.lower() |
|
1017 if name == '$version': |
|
1018 continue |
|
1019 if name[0] == '$': |
|
1020 if last_cookie is not None: |
|
1021 if name == '$path': |
|
1022 last_cookie.path = value |
|
1023 elif name == '$domain': |
|
1024 last_cookie.domain = value |
|
1025 elif name == '$port': |
|
1026 if value is None: |
|
1027 last_cookie.ports = () |
|
1028 else: |
|
1029 last_cookie.ports = tuple([int(s) for s in value.split(',')]) |
|
1030 else: |
|
1031 last_cookie = Cookie(name, value, version=1) |
|
1032 cookies.append(last_cookie) |
|
1033 else: |
|
1034 # Oldstyle cookies don't do quoted strings or anything sensible. |
|
1035 # All characters are valid for names except ';' and '=', and all |
|
1036 # characters are valid for values except ';'. Spaces are stripped, |
|
1037 # however. |
|
1038 r_cookies = header.split(';') |
|
1039 for r_cookie in r_cookies: |
|
1040 name, value = r_cookie.split('=', 1) |
|
1041 name = name.strip(' \t') |
|
1042 value = value.strip(' \t') |
|
1043 |
|
1044 cookies.append(Cookie(name, value)) |
|
1045 |
|
1046 return cookies |
|
1047 |
|
1048 cookie_validname = "[^"+re.escape(http_tokens+http_ctls)+"]*$" |
|
1049 cookie_validname_re = re.compile(cookie_validname) |
|
1050 cookie_validvalue = cookie_validname+'|"([^"]|\\\\")*"$' |
|
1051 cookie_validvalue_re = re.compile(cookie_validvalue) |
|
1052 |
|
1053 def generateCookie(cookies): |
|
1054 # There's a fundamental problem with the two cookie specifications. |
|
1055 # They both use the "Cookie" header, and the RFC Cookie header only allows |
|
1056 # one version to be specified. Thus, when you have a collection of V0 and |
|
1057 # V1 cookies, you have to either send them all as V0 or send them all as |
|
1058 # V1. |
|
1059 |
|
1060 # I choose to send them all as V1. |
|
1061 |
|
1062 # You might think converting a V0 cookie to a V1 cookie would be lossless, |
|
1063 # but you'd be wrong. If you do the conversion, and a V0 parser tries to |
|
1064 # read the cookie, it will see a modified form of the cookie, in cases |
|
1065 # where quotes must be added to conform to proper V1 syntax. |
|
1066 # (as a real example: "Cookie: cartcontents=oid:94680,qty:1,auto:0,esp:y") |
|
1067 |
|
1068 # However, that is what we will do, anyways. It has a high probability of |
|
1069 # breaking applications that only handle oldstyle cookies, where some other |
|
1070 # application set a newstyle cookie that is applicable over for site |
|
1071 # (or host), AND where the oldstyle cookie uses a value which is invalid |
|
1072 # syntax in a newstyle cookie. |
|
1073 |
|
1074 # Also, the cookie name *cannot* be quoted in V1, so some cookies just |
|
1075 # cannot be converted at all. (e.g. "Cookie: phpAds_capAd[32]=2"). These |
|
1076 # are just dicarded during conversion. |
|
1077 |
|
1078 # As this is an unsolvable problem, I will pretend I can just say |
|
1079 # OH WELL, don't do that, or else upgrade your old applications to have |
|
1080 # newstyle cookie parsers. |
|
1081 |
|
1082 # I will note offhandedly that there are *many* sites which send V0 cookies |
|
1083 # that are not valid V1 cookie syntax. About 20% for my cookies file. |
|
1084 # However, they do not generally mix them with V1 cookies, so this isn't |
|
1085 # an issue, at least right now. I have not tested to see how many of those |
|
1086 # webapps support RFC2965 V1 cookies. I suspect not many. |
|
1087 |
|
1088 max_version = max([cookie.version for cookie in cookies]) |
|
1089 |
|
1090 if max_version == 0: |
|
1091 # no quoting or anything. |
|
1092 return ';'.join(["%s=%s" % (cookie.name, cookie.value) for cookie in cookies]) |
|
1093 else: |
|
1094 str_cookies = ['$Version="1"'] |
|
1095 for cookie in cookies: |
|
1096 if cookie.version == 0: |
|
1097 # Version 0 cookie: we make sure the name and value are valid |
|
1098 # V1 syntax. |
|
1099 |
|
1100 # If they are, we use them as is. This means in *most* cases, |
|
1101 # the cookie will look literally the same on output as it did |
|
1102 # on input. |
|
1103 # If it isn't a valid name, ignore the cookie. |
|
1104 # If it isn't a valid value, quote it and hope for the best on |
|
1105 # the other side. |
|
1106 |
|
1107 if cookie_validname_re.match(cookie.name) is None: |
|
1108 continue |
|
1109 |
|
1110 value = cookie.value |
|
1111 if cookie_validvalue_re.match(cookie.value) is None: |
|
1112 value = quoteString(value) |
|
1113 |
|
1114 str_cookies.append("%s=%s" % (cookie.name, value)) |
|
1115 else: |
|
1116 # V1 cookie, nice and easy |
|
1117 str_cookies.append("%s=%s" % (cookie.name, quoteString(cookie.value))) |
|
1118 |
|
1119 if cookie.path: |
|
1120 str_cookies.append("$Path=%s" % quoteString(cookie.path)) |
|
1121 if cookie.domain: |
|
1122 str_cookies.append("$Domain=%s" % quoteString(cookie.domain)) |
|
1123 if cookie.ports is not None: |
|
1124 if len(cookie.ports) == 0: |
|
1125 str_cookies.append("$Port") |
|
1126 else: |
|
1127 str_cookies.append("$Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) |
|
1128 return ';'.join(str_cookies) |
|
1129 |
|
1130 def parseSetCookie(headers): |
|
1131 setCookies = [] |
|
1132 for header in headers: |
|
1133 try: |
|
1134 parts = header.split(';') |
|
1135 l = [] |
|
1136 |
|
1137 for part in parts: |
|
1138 namevalue = part.split('=',1) |
|
1139 if len(namevalue) == 1: |
|
1140 name = namevalue[0] |
|
1141 value = None |
|
1142 else: |
|
1143 name, value = namevalue |
|
1144 value = value.strip(' \t') |
|
1145 |
|
1146 name = name.strip(' \t') |
|
1147 |
|
1148 l.append((name, value)) |
|
1149 |
|
1150 setCookies.append(makeCookieFromList(l, True)) |
|
1151 except ValueError: |
|
1152 # If we can't parse one Set-Cookie, ignore it, |
|
1153 # but not the rest of Set-Cookies. |
|
1154 pass |
|
1155 return setCookies |
|
1156 |
|
1157 def parseSetCookie2(toks): |
|
1158 outCookies = [] |
|
1159 for cookie in [[parseKeyValue(x) for x in split(y, Token(';'))] |
|
1160 for y in split(toks, Token(','))]: |
|
1161 try: |
|
1162 outCookies.append(makeCookieFromList(cookie, False)) |
|
1163 except ValueError: |
|
1164 # Again, if we can't handle one cookie -- ignore it. |
|
1165 pass |
|
1166 return outCookies |
|
1167 |
|
1168 def makeCookieFromList(tup, netscapeFormat): |
|
1169 name, value = tup[0] |
|
1170 if name is None or value is None: |
|
1171 raise ValueError("Cookie has missing name or value") |
|
1172 if name.startswith("$"): |
|
1173 raise ValueError("Invalid cookie name: %r, starts with '$'." % name) |
|
1174 cookie = Cookie(name, value) |
|
1175 hadMaxAge = False |
|
1176 |
|
1177 for name, value in tup[1:]: |
|
1178 name = name.lower() |
|
1179 |
|
1180 if value is None: |
|
1181 if name in ("discard", "secure"): |
|
1182 # Boolean attrs |
|
1183 value = True |
|
1184 elif name != "port": |
|
1185 # Can be either boolean or explicit |
|
1186 continue |
|
1187 |
|
1188 if name in ("comment", "commenturl", "discard", "domain", "path", "secure"): |
|
1189 # simple cases |
|
1190 setattr(cookie, name, value) |
|
1191 elif name == "expires" and not hadMaxAge: |
|
1192 if netscapeFormat and value[0] == '"' and value[-1] == '"': |
|
1193 value = value[1:-1] |
|
1194 cookie.expires = parseDateTime(value) |
|
1195 elif name == "max-age": |
|
1196 hadMaxAge = True |
|
1197 cookie.expires = int(value) + time.time() |
|
1198 elif name == "port": |
|
1199 if value is None: |
|
1200 cookie.ports = () |
|
1201 else: |
|
1202 if netscapeFormat and value[0] == '"' and value[-1] == '"': |
|
1203 value = value[1:-1] |
|
1204 cookie.ports = tuple([int(s) for s in value.split(',')]) |
|
1205 elif name == "version": |
|
1206 cookie.version = int(value) |
|
1207 |
|
1208 return cookie |
|
1209 |
|
1210 |
|
1211 def generateSetCookie(cookies): |
|
1212 setCookies = [] |
|
1213 for cookie in cookies: |
|
1214 out = ["%s=%s" % (cookie.name, cookie.value)] |
|
1215 if cookie.expires: |
|
1216 out.append("expires=%s" % generateDateTime(cookie.expires)) |
|
1217 if cookie.path: |
|
1218 out.append("path=%s" % cookie.path) |
|
1219 if cookie.domain: |
|
1220 out.append("domain=%s" % cookie.domain) |
|
1221 if cookie.secure: |
|
1222 out.append("secure") |
|
1223 if cookie.httponly: |
|
1224 out.append("HttpOnly") |
|
1225 |
|
1226 setCookies.append('; '.join(out)) |
|
1227 return setCookies |
|
1228 |
|
1229 def generateSetCookie2(cookies): |
|
1230 setCookies = [] |
|
1231 for cookie in cookies: |
|
1232 out = ["%s=%s" % (cookie.name, quoteString(cookie.value))] |
|
1233 if cookie.comment: |
|
1234 out.append("Comment=%s" % quoteString(cookie.comment)) |
|
1235 if cookie.commenturl: |
|
1236 out.append("CommentURL=%s" % quoteString(cookie.commenturl)) |
|
1237 if cookie.discard: |
|
1238 out.append("Discard") |
|
1239 if cookie.domain: |
|
1240 out.append("Domain=%s" % quoteString(cookie.domain)) |
|
1241 if cookie.expires: |
|
1242 out.append("Max-Age=%s" % (cookie.expires - time.time())) |
|
1243 if cookie.path: |
|
1244 out.append("Path=%s" % quoteString(cookie.path)) |
|
1245 if cookie.ports is not None: |
|
1246 if len(cookie.ports) == 0: |
|
1247 out.append("Port") |
|
1248 else: |
|
1249 out.append("Port=%s" % quoteString(",".join([str(x) for x in cookie.ports]))) |
|
1250 if cookie.secure: |
|
1251 out.append("Secure") |
|
1252 if cookie.httponly: |
|
1253 out.append("HttpOnly") |
|
1254 out.append('Version="1"') |
|
1255 setCookies.append('; '.join(out)) |
|
1256 return setCookies |
|
1257 |
|
1258 def parseDepth(depth): |
|
1259 if depth not in ("0", "1", "infinity"): |
|
1260 raise ValueError("Invalid depth header value: %s" % (depth,)) |
|
1261 return depth |
|
1262 |
|
1263 def parseOverWrite(overwrite): |
|
1264 if overwrite == "F": |
|
1265 return False |
|
1266 elif overwrite == "T": |
|
1267 return True |
|
1268 raise ValueError("Invalid overwrite header value: %s" % (overwrite,)) |
|
1269 |
|
1270 def generateOverWrite(overwrite): |
|
1271 if overwrite: |
|
1272 return "T" |
|
1273 else: |
|
1274 return "F" |
|
1275 |
|
1276 ##### Random stuff that looks useful. |
|
1277 # def sortMimeQuality(s): |
|
1278 # def sorter(item1, item2): |
|
1279 # if item1[0] == '*': |
|
1280 # if item2[0] == '*': |
|
1281 # return 0 |
|
1282 |
|
1283 |
|
1284 # def sortQuality(s): |
|
1285 # def sorter(item1, item2): |
|
1286 # if item1[1] < item2[1]: |
|
1287 # return -1 |
|
1288 # if item1[1] < item2[1]: |
|
1289 # return 1 |
|
1290 # if item1[0] == item2[0]: |
|
1291 # return 0 |
|
1292 |
|
1293 |
|
1294 # def getMimeQuality(mimeType, accepts): |
|
1295 # type, args = parseArgs(mimeType) |
|
1296 # type = type.split(Token('/')) |
|
1297 # if len(type) != 2: |
|
1298 # raise ValueError, "MIME Type "+s+" invalid." |
|
1299 |
|
1300 # for accept in accepts: |
|
1301 # accept, acceptQual = accept |
|
1302 # acceptType = accept[0:1] |
|
1303 # acceptArgs = accept[2] |
|
1304 |
|
1305 # if ((acceptType == type or acceptType == (type[0],'*') or acceptType==('*','*')) and |
|
1306 # (args == acceptArgs or len(acceptArgs) == 0)): |
|
1307 # return acceptQual |
|
1308 |
|
1309 # def getQuality(type, accepts): |
|
1310 # qual = accepts.get(type) |
|
1311 # if qual is not None: |
|
1312 # return qual |
|
1313 |
|
1314 # return accepts.get('*') |
|
1315 |
|
1316 # Headers object |
|
1317 class __RecalcNeeded(object): |
|
1318 def __repr__(self): |
|
1319 return "<RecalcNeeded>" |
|
1320 |
|
1321 _RecalcNeeded = __RecalcNeeded() |
|
1322 |
|
1323 class Headers(object): |
|
1324 """This class stores the HTTP headers as both a parsed representation and |
|
1325 the raw string representation. It converts between the two on demand.""" |
|
1326 |
|
1327 def __init__(self, headers=None, rawHeaders=None, handler=DefaultHTTPHandler): |
|
1328 self._raw_headers = {} |
|
1329 self._headers = {} |
|
1330 self.handler = handler |
|
1331 if headers is not None: |
|
1332 for key, value in headers.items(): |
|
1333 self.setHeader(key, value) |
|
1334 if rawHeaders is not None: |
|
1335 for key, value in rawHeaders.items(): |
|
1336 self.setRawHeaders(key, value) |
|
1337 |
|
1338 def _setRawHeaders(self, headers): |
|
1339 self._raw_headers = headers |
|
1340 self._headers = {} |
|
1341 |
|
1342 def _toParsed(self, name): |
|
1343 r = self._raw_headers.get(name, None) |
|
1344 h = self.handler.parse(name, r) |
|
1345 if h is not None: |
|
1346 self._headers[name] = h |
|
1347 return h |
|
1348 |
|
1349 def _toRaw(self, name): |
|
1350 h = self._headers.get(name, None) |
|
1351 r = self.handler.generate(name, h) |
|
1352 if r is not None: |
|
1353 assert isinstance(r, list) |
|
1354 for v in r: |
|
1355 assert isinstance(v, str) |
|
1356 self._raw_headers[name] = r |
|
1357 return r |
|
1358 |
|
1359 def __contains__(self, name): |
|
1360 """Does a header with the given name exist?""" |
|
1361 return name.lower() in self._raw_headers |
|
1362 |
|
1363 hasHeader = __contains__ |
|
1364 |
|
1365 def getRawHeaders(self, name, default=None): |
|
1366 """Returns a list of headers matching the given name as the raw string given.""" |
|
1367 |
|
1368 name = name.lower() |
|
1369 raw_header = self._raw_headers.get(name, default) |
|
1370 if raw_header is not _RecalcNeeded: |
|
1371 return raw_header |
|
1372 |
|
1373 return self._toRaw(name) |
|
1374 |
|
1375 def getHeader(self, name, default=None): |
|
1376 """Returns the parsed representation of the given header. |
|
1377 The exact form of the return value depends on the header in question. |
|
1378 |
|
1379 If no parser for the header exists, raise ValueError. |
|
1380 |
|
1381 If the header doesn't exist, return default (or None if not specified) |
|
1382 """ |
|
1383 name = name.lower() |
|
1384 parsed = self._headers.get(name, default) |
|
1385 if parsed is not _RecalcNeeded: |
|
1386 return parsed |
|
1387 return self._toParsed(name) |
|
1388 |
|
1389 def setRawHeaders(self, name, value): |
|
1390 """Sets the raw representation of the given header. |
|
1391 Value should be a list of strings, each being one header of the |
|
1392 given name. |
|
1393 """ |
|
1394 assert isinstance(value, list) |
|
1395 for v in value: |
|
1396 assert isinstance(v, str) |
|
1397 name = name.lower() |
|
1398 self._raw_headers[name] = value |
|
1399 self._headers[name] = _RecalcNeeded |
|
1400 |
|
1401 def setHeader(self, name, value): |
|
1402 """Sets the parsed representation of the given header. |
|
1403 Value should be a list of objects whose exact form depends |
|
1404 on the header in question. |
|
1405 """ |
|
1406 name = name.lower() |
|
1407 self._raw_headers[name] = _RecalcNeeded |
|
1408 self._headers[name] = value |
|
1409 |
|
1410 def addRawHeader(self, name, value): |
|
1411 """ |
|
1412 Add a raw value to a header that may or may not already exist. |
|
1413 If it exists, add it as a separate header to output; do not |
|
1414 replace anything. |
|
1415 """ |
|
1416 name = name.lower() |
|
1417 raw_header = self._raw_headers.get(name) |
|
1418 if raw_header is None: |
|
1419 # No header yet |
|
1420 raw_header = [] |
|
1421 self._raw_headers[name] = raw_header |
|
1422 elif raw_header is _RecalcNeeded: |
|
1423 raw_header = self._toRaw(name) |
|
1424 |
|
1425 raw_header.append(value) |
|
1426 self._headers[name] = _RecalcNeeded |
|
1427 |
|
1428 def addHeader(self, name, value): |
|
1429 """ |
|
1430 Add a parsed representatoin to a header that may or may not already exist. |
|
1431 If it exists, add it as a separate header to output; do not |
|
1432 replace anything. |
|
1433 """ |
|
1434 name = name.lower() |
|
1435 header = self._headers.get(name) |
|
1436 if header is None: |
|
1437 # No header yet |
|
1438 header = [] |
|
1439 self._headers[name] = header |
|
1440 elif header is _RecalcNeeded: |
|
1441 header = self._toParsed(name) |
|
1442 header.append(value) |
|
1443 self._raw_headers[name] = _RecalcNeeded |
|
1444 |
|
1445 def removeHeader(self, name): |
|
1446 """Removes the header named.""" |
|
1447 name = name.lower() |
|
1448 if name in self._raw_headers: |
|
1449 del self._raw_headers[name] |
|
1450 del self._headers[name] |
|
1451 |
|
1452 def __repr__(self): |
|
1453 return '<Headers: Raw: %s Parsed: %s>'% (self._raw_headers, self._headers) |
|
1454 |
|
1455 def canonicalNameCaps(self, name): |
|
1456 """Return the name with the canonical capitalization, if known, |
|
1457 otherwise, Caps-After-Dashes""" |
|
1458 return header_case_mapping.get(name) or dashCapitalize(name) |
|
1459 |
|
1460 def getAllRawHeaders(self): |
|
1461 """Return an iterator of key, value pairs of all headers |
|
1462 contained in this object, as strings. The keys are capitalized |
|
1463 in canonical capitalization.""" |
|
1464 for k, v in self._raw_headers.items(): |
|
1465 if v is _RecalcNeeded: |
|
1466 v = self._toRaw(k) |
|
1467 yield self.canonicalNameCaps(k), v |
|
1468 |
|
1469 def makeImmutable(self): |
|
1470 """Make this header set immutable. All mutating operations will |
|
1471 raise an exception.""" |
|
1472 self.setHeader = self.setRawHeaders = self.removeHeader = self._mutateRaise |
|
1473 |
|
1474 def _mutateRaise(self, *args): |
|
1475 raise AttributeError("This header object is immutable as the headers have already been sent.") |
|
1476 |
|
1477 |
|
1478 """The following dicts are all mappings of header to list of operations |
|
1479 to perform. The first operation should generally be 'tokenize' if the |
|
1480 header can be parsed according to the normal tokenization rules. If |
|
1481 it cannot, generally the first thing you want to do is take only the |
|
1482 last instance of the header (in case it was sent multiple times, which |
|
1483 is strictly an error, but we're nice.). |
|
1484 """ |
|
1485 |
|
1486 iteritems = lambda x: x.items() |
|
1487 |
|
1488 |
|
1489 parser_general_headers = { |
|
1490 'Cache-Control': (tokenize, listParser(parseCacheControl), dict), |
|
1491 'Connection': (tokenize, filterTokens), |
|
1492 'Date': (last, parseDateTime), |
|
1493 # 'Pragma':tokenize |
|
1494 # 'Trailer':tokenize |
|
1495 'Transfer-Encoding': (tokenize, filterTokens), |
|
1496 # 'Upgrade':tokenize |
|
1497 # 'Via':tokenize, stripComment |
|
1498 # 'Warning':tokenize |
|
1499 } |
|
1500 |
|
1501 generator_general_headers = { |
|
1502 'Cache-Control': (iteritems, listGenerator(generateCacheControl), singleHeader), |
|
1503 'Connection': (generateList, singleHeader), |
|
1504 'Date': (generateDateTime, singleHeader), |
|
1505 # 'Pragma': |
|
1506 # 'Trailer': |
|
1507 'Transfer-Encoding': (generateList, singleHeader), |
|
1508 # 'Upgrade': |
|
1509 # 'Via': |
|
1510 # 'Warning': |
|
1511 } |
|
1512 |
|
1513 parser_request_headers = { |
|
1514 'Accept': (tokenize, listParser(parseAccept), dict), |
|
1515 'Accept-Charset': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultCharset), |
|
1516 'Accept-Encoding': (tokenize, listParser(parseAcceptQvalue), dict, addDefaultEncoding), |
|
1517 'Accept-Language': (tokenize, listParser(parseAcceptQvalue), dict), |
|
1518 'Access-Control-Request-Method': (parseHTTPMethod, ), |
|
1519 'Access-Control-Request-Headers': (filterTokens, ), |
|
1520 'Authorization': (last, parseAuthorization), |
|
1521 'Cookie': (parseCookie,), |
|
1522 'Expect': (tokenize, listParser(parseExpect), dict), |
|
1523 'Origin': (last,), |
|
1524 'From': (last,), |
|
1525 'Host': (last,), |
|
1526 'If-Match': (tokenize, listParser(parseStarOrETag), list), |
|
1527 'If-Modified-Since': (last, parseIfModifiedSince), |
|
1528 'If-None-Match': (tokenize, listParser(parseStarOrETag), list), |
|
1529 'If-Range': (parseIfRange,), |
|
1530 'If-Unmodified-Since': (last, parseDateTime), |
|
1531 'Max-Forwards': (last, int), |
|
1532 # 'Proxy-Authorization':str, # what is "credentials" |
|
1533 'Range': (tokenize, parseRange), |
|
1534 'Referer': (last, str), # TODO: URI object? |
|
1535 'TE': (tokenize, listParser(parseAcceptQvalue), dict), |
|
1536 'User-Agent': (last, str), |
|
1537 } |
|
1538 |
|
1539 generator_request_headers = { |
|
1540 'Accept': (iteritems, listGenerator(generateAccept), singleHeader), |
|
1541 'Accept-Charset': (iteritems, listGenerator(generateAcceptQvalue), singleHeader), |
|
1542 'Accept-Encoding': (iteritems, removeDefaultEncoding, |
|
1543 listGenerator(generateAcceptQvalue), singleHeader), |
|
1544 'Accept-Language': (iteritems, listGenerator(generateAcceptQvalue), singleHeader), |
|
1545 'Access-Control-Request-Method': (unique, str, singleHeader, ), |
|
1546 'Authorization': (generateAuthorization,), # what is "credentials" |
|
1547 'Cookie': (generateCookie, singleHeader), |
|
1548 'Expect': (iteritems, listGenerator(generateExpect), singleHeader), |
|
1549 'From': (unique, str, singleHeader), |
|
1550 'Host': (unique, str, singleHeader), |
|
1551 'If-Match': (listGenerator(generateStarOrETag), singleHeader), |
|
1552 'If-Modified-Since': (generateDateTime, singleHeader), |
|
1553 'If-None-Match': (listGenerator(generateStarOrETag), singleHeader), |
|
1554 'If-Range': (generateIfRange, singleHeader), |
|
1555 'If-Unmodified-Since': (generateDateTime, singleHeader), |
|
1556 'Max-Forwards': (unique, str, singleHeader), |
|
1557 'Origin': (unique, str, singleHeader), |
|
1558 # 'Proxy-Authorization':str, # what is "credentials" |
|
1559 'Range': (generateRange, singleHeader), |
|
1560 'Referer': (unique, str, singleHeader), |
|
1561 'TE': (iteritems, listGenerator(generateAcceptQvalue), singleHeader), |
|
1562 'User-Agent': (unique, str, singleHeader), |
|
1563 } |
|
1564 |
|
1565 parser_response_headers = { |
|
1566 'Accept-Ranges': (tokenize, filterTokens), |
|
1567 'Access-Control-Allow-Origin': (last, parseAllowOrigin,), |
|
1568 'Access-Control-Allow-Credentials': (last, parseAllowCreds,), |
|
1569 'Access-Control-Allow-Methods': (tokenize, listParser(parseHTTPMethod), list), |
|
1570 'Access-Control-Allow-Headers': (listGenerator(str), ), |
|
1571 'Access-Control-Expose-Headers': (filterTokens, ), |
|
1572 'Age': (last, int), |
|
1573 'ETag': (tokenize, ETag.parse), |
|
1574 'Location': (last,), # TODO: URI object? |
|
1575 # 'Proxy-Authenticate' |
|
1576 'Retry-After': (last, parseRetryAfter), |
|
1577 'Server': (last,), |
|
1578 'Set-Cookie': (parseSetCookie,), |
|
1579 'Set-Cookie2': (tokenize, parseSetCookie2), |
|
1580 'Vary': (tokenize, filterTokens), |
|
1581 'WWW-Authenticate': (lambda h: tokenize(h, foldCase=False), |
|
1582 parseWWWAuthenticate,) |
|
1583 } |
|
1584 |
|
1585 generator_response_headers = { |
|
1586 'Accept-Ranges': (generateList, singleHeader), |
|
1587 'Access-Control-Allow-Origin': (unique, str, singleHeader), |
|
1588 'Access-Control-Allow-Credentials': (generateTrueFalse, singleHeader), |
|
1589 'Access-Control-Allow-Headers': (set, generateList, singleHeader), |
|
1590 'Access-Control-Allow-Methods': (set, generateList, singleHeader), |
|
1591 'Access-Control-Expose-Headers': (set, generateList, singleHeader), |
|
1592 'Age': (unique, str, singleHeader), |
|
1593 'ETag': (ETag.generate, singleHeader), |
|
1594 'Location': (unique, str, singleHeader), |
|
1595 # 'Proxy-Authenticate' |
|
1596 'Retry-After': (generateRetryAfter, singleHeader), |
|
1597 'Server': (unique, str, singleHeader), |
|
1598 'Set-Cookie': (generateSetCookie,), |
|
1599 'Set-Cookie2': (generateSetCookie2,), |
|
1600 'Vary': (set, generateList, singleHeader), |
|
1601 'WWW-Authenticate': (generateWWWAuthenticate,) |
|
1602 } |
|
1603 |
|
1604 parser_entity_headers = { |
|
1605 'Allow': (lambda str:tokenize(str, foldCase=False), filterTokens), |
|
1606 'Content-Encoding': (tokenize, filterTokens), |
|
1607 'Content-Language': (tokenize, filterTokens), |
|
1608 'Content-Length': (last, int), |
|
1609 'Content-Location': (last,), # TODO: URI object? |
|
1610 'Content-MD5': (last, parseContentMD5), |
|
1611 'Content-Range': (last, parseContentRange), |
|
1612 'Content-Type': (lambda str:tokenize(str, foldCase=False), parseContentType), |
|
1613 'Expires': (last, parseExpires), |
|
1614 'Last-Modified': (last, parseDateTime), |
|
1615 } |
|
1616 |
|
1617 generator_entity_headers = { |
|
1618 'Allow': (generateList, singleHeader), |
|
1619 'Content-Encoding': (generateList, singleHeader), |
|
1620 'Content-Language': (generateList, singleHeader), |
|
1621 'Content-Length': (unique, str, singleHeader), |
|
1622 'Content-Location': (unique, str, singleHeader), |
|
1623 'Content-MD5': (base64.encodestring, lambda x: x.strip("\n"), singleHeader), |
|
1624 'Content-Range': (generateContentRange, singleHeader), |
|
1625 'Content-Type': (generateContentType, singleHeader), |
|
1626 'Expires': (generateDateTime, singleHeader), |
|
1627 'Last-Modified': (generateDateTime, singleHeader), |
|
1628 } |
|
1629 |
|
1630 DefaultHTTPHandler.updateParsers(parser_general_headers) |
|
1631 DefaultHTTPHandler.updateParsers(parser_request_headers) |
|
1632 DefaultHTTPHandler.updateParsers(parser_response_headers) |
|
1633 DefaultHTTPHandler.updateParsers(parser_entity_headers) |
|
1634 |
|
1635 DefaultHTTPHandler.updateGenerators(generator_general_headers) |
|
1636 DefaultHTTPHandler.updateGenerators(generator_request_headers) |
|
1637 DefaultHTTPHandler.updateGenerators(generator_response_headers) |
|
1638 DefaultHTTPHandler.updateGenerators(generator_entity_headers) |
|
1639 |
|
1640 |
|
1641 # casemappingify(DefaultHTTPParsers) |
|
1642 # casemappingify(DefaultHTTPGenerators) |
|
1643 |
|
1644 # lowerify(DefaultHTTPParsers) |
|
1645 # lowerify(DefaultHTTPGenerators) |
|