comparison env/lib/python3.7/site-packages/urllib3/response.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 from __future__ import absolute_import
2 from contextlib import contextmanager
3 import zlib
4 import io
5 import logging
6 from socket import timeout as SocketTimeout
7 from socket import error as SocketError
8
9 try:
10 import brotli
11 except ImportError:
12 brotli = None
13
14 from ._collections import HTTPHeaderDict
15 from .exceptions import (
16 BodyNotHttplibCompatible,
17 ProtocolError,
18 DecodeError,
19 ReadTimeoutError,
20 ResponseNotChunked,
21 IncompleteRead,
22 InvalidHeader,
23 HTTPError,
24 )
25 from .packages.six import string_types as basestring, PY3
26 from .packages.six.moves import http_client as httplib
27 from .connection import HTTPException, BaseSSLError
28 from .util.response import is_fp_closed, is_response_to_head
29
30 log = logging.getLogger(__name__)
31
32
33 class DeflateDecoder(object):
34 def __init__(self):
35 self._first_try = True
36 self._data = b""
37 self._obj = zlib.decompressobj()
38
39 def __getattr__(self, name):
40 return getattr(self._obj, name)
41
42 def decompress(self, data):
43 if not data:
44 return data
45
46 if not self._first_try:
47 return self._obj.decompress(data)
48
49 self._data += data
50 try:
51 decompressed = self._obj.decompress(data)
52 if decompressed:
53 self._first_try = False
54 self._data = None
55 return decompressed
56 except zlib.error:
57 self._first_try = False
58 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
59 try:
60 return self.decompress(self._data)
61 finally:
62 self._data = None
63
64
65 class GzipDecoderState(object):
66
67 FIRST_MEMBER = 0
68 OTHER_MEMBERS = 1
69 SWALLOW_DATA = 2
70
71
72 class GzipDecoder(object):
73 def __init__(self):
74 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
75 self._state = GzipDecoderState.FIRST_MEMBER
76
77 def __getattr__(self, name):
78 return getattr(self._obj, name)
79
80 def decompress(self, data):
81 ret = bytearray()
82 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
83 return bytes(ret)
84 while True:
85 try:
86 ret += self._obj.decompress(data)
87 except zlib.error:
88 previous_state = self._state
89 # Ignore data after the first error
90 self._state = GzipDecoderState.SWALLOW_DATA
91 if previous_state == GzipDecoderState.OTHER_MEMBERS:
92 # Allow trailing garbage acceptable in other gzip clients
93 return bytes(ret)
94 raise
95 data = self._obj.unused_data
96 if not data:
97 return bytes(ret)
98 self._state = GzipDecoderState.OTHER_MEMBERS
99 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
100
101
102 if brotli is not None:
103
104 class BrotliDecoder(object):
105 # Supports both 'brotlipy' and 'Brotli' packages
106 # since they share an import name. The top branches
107 # are for 'brotlipy' and bottom branches for 'Brotli'
108 def __init__(self):
109 self._obj = brotli.Decompressor()
110
111 def decompress(self, data):
112 if hasattr(self._obj, "decompress"):
113 return self._obj.decompress(data)
114 return self._obj.process(data)
115
116 def flush(self):
117 if hasattr(self._obj, "flush"):
118 return self._obj.flush()
119 return b""
120
121
122 class MultiDecoder(object):
123 """
124 From RFC7231:
125 If one or more encodings have been applied to a representation, the
126 sender that applied the encodings MUST generate a Content-Encoding
127 header field that lists the content codings in the order in which
128 they were applied.
129 """
130
131 def __init__(self, modes):
132 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
133
134 def flush(self):
135 return self._decoders[0].flush()
136
137 def decompress(self, data):
138 for d in reversed(self._decoders):
139 data = d.decompress(data)
140 return data
141
142
143 def _get_decoder(mode):
144 if "," in mode:
145 return MultiDecoder(mode)
146
147 if mode == "gzip":
148 return GzipDecoder()
149
150 if brotli is not None and mode == "br":
151 return BrotliDecoder()
152
153 return DeflateDecoder()
154
155
156 class HTTPResponse(io.IOBase):
157 """
158 HTTP Response container.
159
160 Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
161 loaded and decoded on-demand when the ``data`` property is accessed. This
162 class is also compatible with the Python standard library's :mod:`io`
163 module, and can hence be treated as a readable object in the context of that
164 framework.
165
166 Extra parameters for behaviour not present in httplib.HTTPResponse:
167
168 :param preload_content:
169 If True, the response's body will be preloaded during construction.
170
171 :param decode_content:
172 If True, will attempt to decode the body based on the
173 'content-encoding' header.
174
175 :param original_response:
176 When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
177 object, it's convenient to include the original for debug purposes. It's
178 otherwise unused.
179
180 :param retries:
181 The retries contains the last :class:`~urllib3.util.retry.Retry` that
182 was used during the request.
183
184 :param enforce_content_length:
185 Enforce content length checking. Body returned by server must match
186 value of Content-Length header, if present. Otherwise, raise error.
187 """
188
189 CONTENT_DECODERS = ["gzip", "deflate"]
190 if brotli is not None:
191 CONTENT_DECODERS += ["br"]
192 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
193
194 def __init__(
195 self,
196 body="",
197 headers=None,
198 status=0,
199 version=0,
200 reason=None,
201 strict=0,
202 preload_content=True,
203 decode_content=True,
204 original_response=None,
205 pool=None,
206 connection=None,
207 msg=None,
208 retries=None,
209 enforce_content_length=False,
210 request_method=None,
211 request_url=None,
212 auto_close=True,
213 ):
214
215 if isinstance(headers, HTTPHeaderDict):
216 self.headers = headers
217 else:
218 self.headers = HTTPHeaderDict(headers)
219 self.status = status
220 self.version = version
221 self.reason = reason
222 self.strict = strict
223 self.decode_content = decode_content
224 self.retries = retries
225 self.enforce_content_length = enforce_content_length
226 self.auto_close = auto_close
227
228 self._decoder = None
229 self._body = None
230 self._fp = None
231 self._original_response = original_response
232 self._fp_bytes_read = 0
233 self.msg = msg
234 self._request_url = request_url
235
236 if body and isinstance(body, (basestring, bytes)):
237 self._body = body
238
239 self._pool = pool
240 self._connection = connection
241
242 if hasattr(body, "read"):
243 self._fp = body
244
245 # Are we using the chunked-style of transfer encoding?
246 self.chunked = False
247 self.chunk_left = None
248 tr_enc = self.headers.get("transfer-encoding", "").lower()
249 # Don't incur the penalty of creating a list and then discarding it
250 encodings = (enc.strip() for enc in tr_enc.split(","))
251 if "chunked" in encodings:
252 self.chunked = True
253
254 # Determine length of response
255 self.length_remaining = self._init_length(request_method)
256
257 # If requested, preload the body.
258 if preload_content and not self._body:
259 self._body = self.read(decode_content=decode_content)
260
261 def get_redirect_location(self):
262 """
263 Should we redirect and where to?
264
265 :returns: Truthy redirect location string if we got a redirect status
266 code and valid location. ``None`` if redirect status and no
267 location. ``False`` if not a redirect status code.
268 """
269 if self.status in self.REDIRECT_STATUSES:
270 return self.headers.get("location")
271
272 return False
273
274 def release_conn(self):
275 if not self._pool or not self._connection:
276 return
277
278 self._pool._put_conn(self._connection)
279 self._connection = None
280
281 def drain_conn(self):
282 """
283 Read and discard any remaining HTTP response data in the response connection.
284
285 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
286 """
287 try:
288 self.read()
289 except (HTTPError, SocketError, BaseSSLError, HTTPException):
290 pass
291
292 @property
293 def data(self):
294 # For backwords-compat with earlier urllib3 0.4 and earlier.
295 if self._body:
296 return self._body
297
298 if self._fp:
299 return self.read(cache_content=True)
300
301 @property
302 def connection(self):
303 return self._connection
304
305 def isclosed(self):
306 return is_fp_closed(self._fp)
307
308 def tell(self):
309 """
310 Obtain the number of bytes pulled over the wire so far. May differ from
311 the amount of content returned by :meth:``HTTPResponse.read`` if bytes
312 are encoded on the wire (e.g, compressed).
313 """
314 return self._fp_bytes_read
315
316 def _init_length(self, request_method):
317 """
318 Set initial length value for Response content if available.
319 """
320 length = self.headers.get("content-length")
321
322 if length is not None:
323 if self.chunked:
324 # This Response will fail with an IncompleteRead if it can't be
325 # received as chunked. This method falls back to attempt reading
326 # the response before raising an exception.
327 log.warning(
328 "Received response with both Content-Length and "
329 "Transfer-Encoding set. This is expressly forbidden "
330 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
331 "attempting to process response as Transfer-Encoding: "
332 "chunked."
333 )
334 return None
335
336 try:
337 # RFC 7230 section 3.3.2 specifies multiple content lengths can
338 # be sent in a single Content-Length header
339 # (e.g. Content-Length: 42, 42). This line ensures the values
340 # are all valid ints and that as long as the `set` length is 1,
341 # all values are the same. Otherwise, the header is invalid.
342 lengths = set([int(val) for val in length.split(",")])
343 if len(lengths) > 1:
344 raise InvalidHeader(
345 "Content-Length contained multiple "
346 "unmatching values (%s)" % length
347 )
348 length = lengths.pop()
349 except ValueError:
350 length = None
351 else:
352 if length < 0:
353 length = None
354
355 # Convert status to int for comparison
356 # In some cases, httplib returns a status of "_UNKNOWN"
357 try:
358 status = int(self.status)
359 except ValueError:
360 status = 0
361
362 # Check for responses that shouldn't include a body
363 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
364 length = 0
365
366 return length
367
368 def _init_decoder(self):
369 """
370 Set-up the _decoder attribute if necessary.
371 """
372 # Note: content-encoding value should be case-insensitive, per RFC 7230
373 # Section 3.2
374 content_encoding = self.headers.get("content-encoding", "").lower()
375 if self._decoder is None:
376 if content_encoding in self.CONTENT_DECODERS:
377 self._decoder = _get_decoder(content_encoding)
378 elif "," in content_encoding:
379 encodings = [
380 e.strip()
381 for e in content_encoding.split(",")
382 if e.strip() in self.CONTENT_DECODERS
383 ]
384 if len(encodings):
385 self._decoder = _get_decoder(content_encoding)
386
387 DECODER_ERROR_CLASSES = (IOError, zlib.error)
388 if brotli is not None:
389 DECODER_ERROR_CLASSES += (brotli.error,)
390
391 def _decode(self, data, decode_content, flush_decoder):
392 """
393 Decode the data passed in and potentially flush the decoder.
394 """
395 if not decode_content:
396 return data
397
398 try:
399 if self._decoder:
400 data = self._decoder.decompress(data)
401 except self.DECODER_ERROR_CLASSES as e:
402 content_encoding = self.headers.get("content-encoding", "").lower()
403 raise DecodeError(
404 "Received response with content-encoding: %s, but "
405 "failed to decode it." % content_encoding,
406 e,
407 )
408 if flush_decoder:
409 data += self._flush_decoder()
410
411 return data
412
413 def _flush_decoder(self):
414 """
415 Flushes the decoder. Should only be called if the decoder is actually
416 being used.
417 """
418 if self._decoder:
419 buf = self._decoder.decompress(b"")
420 return buf + self._decoder.flush()
421
422 return b""
423
424 @contextmanager
425 def _error_catcher(self):
426 """
427 Catch low-level python exceptions, instead re-raising urllib3
428 variants, so that low-level exceptions are not leaked in the
429 high-level api.
430
431 On exit, release the connection back to the pool.
432 """
433 clean_exit = False
434
435 try:
436 try:
437 yield
438
439 except SocketTimeout:
440 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
441 # there is yet no clean way to get at it from this context.
442 raise ReadTimeoutError(self._pool, None, "Read timed out.")
443
444 except BaseSSLError as e:
445 # FIXME: Is there a better way to differentiate between SSLErrors?
446 if "read operation timed out" not in str(e): # Defensive:
447 # This shouldn't happen but just in case we're missing an edge
448 # case, let's avoid swallowing SSL errors.
449 raise
450
451 raise ReadTimeoutError(self._pool, None, "Read timed out.")
452
453 except (HTTPException, SocketError) as e:
454 # This includes IncompleteRead.
455 raise ProtocolError("Connection broken: %r" % e, e)
456
457 # If no exception is thrown, we should avoid cleaning up
458 # unnecessarily.
459 clean_exit = True
460 finally:
461 # If we didn't terminate cleanly, we need to throw away our
462 # connection.
463 if not clean_exit:
464 # The response may not be closed but we're not going to use it
465 # anymore so close it now to ensure that the connection is
466 # released back to the pool.
467 if self._original_response:
468 self._original_response.close()
469
470 # Closing the response may not actually be sufficient to close
471 # everything, so if we have a hold of the connection close that
472 # too.
473 if self._connection:
474 self._connection.close()
475
476 # If we hold the original response but it's closed now, we should
477 # return the connection back to the pool.
478 if self._original_response and self._original_response.isclosed():
479 self.release_conn()
480
481 def read(self, amt=None, decode_content=None, cache_content=False):
482 """
483 Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
484 parameters: ``decode_content`` and ``cache_content``.
485
486 :param amt:
487 How much of the content to read. If specified, caching is skipped
488 because it doesn't make sense to cache partial content as the full
489 response.
490
491 :param decode_content:
492 If True, will attempt to decode the body based on the
493 'content-encoding' header.
494
495 :param cache_content:
496 If True, will save the returned data such that the same result is
497 returned despite of the state of the underlying file object. This
498 is useful if you want the ``.data`` property to continue working
499 after having ``.read()`` the file object. (Overridden if ``amt`` is
500 set.)
501 """
502 self._init_decoder()
503 if decode_content is None:
504 decode_content = self.decode_content
505
506 if self._fp is None:
507 return
508
509 flush_decoder = False
510 fp_closed = getattr(self._fp, "closed", False)
511
512 with self._error_catcher():
513 if amt is None:
514 # cStringIO doesn't like amt=None
515 data = self._fp.read() if not fp_closed else b""
516 flush_decoder = True
517 else:
518 cache_content = False
519 data = self._fp.read(amt) if not fp_closed else b""
520 if (
521 amt != 0 and not data
522 ): # Platform-specific: Buggy versions of Python.
523 # Close the connection when no data is returned
524 #
525 # This is redundant to what httplib/http.client _should_
526 # already do. However, versions of python released before
527 # December 15, 2012 (http://bugs.python.org/issue16298) do
528 # not properly close the connection in all cases. There is
529 # no harm in redundantly calling close.
530 self._fp.close()
531 flush_decoder = True
532 if self.enforce_content_length and self.length_remaining not in (
533 0,
534 None,
535 ):
536 # This is an edge case that httplib failed to cover due
537 # to concerns of backward compatibility. We're
538 # addressing it here to make sure IncompleteRead is
539 # raised during streaming, so all calls with incorrect
540 # Content-Length are caught.
541 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
542
543 if data:
544 self._fp_bytes_read += len(data)
545 if self.length_remaining is not None:
546 self.length_remaining -= len(data)
547
548 data = self._decode(data, decode_content, flush_decoder)
549
550 if cache_content:
551 self._body = data
552
553 return data
554
555 def stream(self, amt=2 ** 16, decode_content=None):
556 """
557 A generator wrapper for the read() method. A call will block until
558 ``amt`` bytes have been read from the connection or until the
559 connection is closed.
560
561 :param amt:
562 How much of the content to read. The generator will return up to
563 much data per iteration, but may return less. This is particularly
564 likely when using compressed data. However, the empty string will
565 never be returned.
566
567 :param decode_content:
568 If True, will attempt to decode the body based on the
569 'content-encoding' header.
570 """
571 if self.chunked and self.supports_chunked_reads():
572 for line in self.read_chunked(amt, decode_content=decode_content):
573 yield line
574 else:
575 while not is_fp_closed(self._fp):
576 data = self.read(amt=amt, decode_content=decode_content)
577
578 if data:
579 yield data
580
581 @classmethod
582 def from_httplib(ResponseCls, r, **response_kw):
583 """
584 Given an :class:`httplib.HTTPResponse` instance ``r``, return a
585 corresponding :class:`urllib3.response.HTTPResponse` object.
586
587 Remaining parameters are passed to the HTTPResponse constructor, along
588 with ``original_response=r``.
589 """
590 headers = r.msg
591
592 if not isinstance(headers, HTTPHeaderDict):
593 if PY3:
594 headers = HTTPHeaderDict(headers.items())
595 else:
596 # Python 2.7
597 headers = HTTPHeaderDict.from_httplib(headers)
598
599 # HTTPResponse objects in Python 3 don't have a .strict attribute
600 strict = getattr(r, "strict", 0)
601 resp = ResponseCls(
602 body=r,
603 headers=headers,
604 status=r.status,
605 version=r.version,
606 reason=r.reason,
607 strict=strict,
608 original_response=r,
609 **response_kw
610 )
611 return resp
612
613 # Backwards-compatibility methods for httplib.HTTPResponse
614 def getheaders(self):
615 return self.headers
616
617 def getheader(self, name, default=None):
618 return self.headers.get(name, default)
619
620 # Backwards compatibility for http.cookiejar
621 def info(self):
622 return self.headers
623
624 # Overrides from io.IOBase
625 def close(self):
626 if not self.closed:
627 self._fp.close()
628
629 if self._connection:
630 self._connection.close()
631
632 if not self.auto_close:
633 io.IOBase.close(self)
634
635 @property
636 def closed(self):
637 if not self.auto_close:
638 return io.IOBase.closed.__get__(self)
639 elif self._fp is None:
640 return True
641 elif hasattr(self._fp, "isclosed"):
642 return self._fp.isclosed()
643 elif hasattr(self._fp, "closed"):
644 return self._fp.closed
645 else:
646 return True
647
648 def fileno(self):
649 if self._fp is None:
650 raise IOError("HTTPResponse has no file to get a fileno from")
651 elif hasattr(self._fp, "fileno"):
652 return self._fp.fileno()
653 else:
654 raise IOError(
655 "The file-like object this HTTPResponse is wrapped "
656 "around has no file descriptor"
657 )
658
659 def flush(self):
660 if (
661 self._fp is not None
662 and hasattr(self._fp, "flush")
663 and not getattr(self._fp, "closed", False)
664 ):
665 return self._fp.flush()
666
667 def readable(self):
668 # This method is required for `io` module compatibility.
669 return True
670
671 def readinto(self, b):
672 # This method is required for `io` module compatibility.
673 temp = self.read(len(b))
674 if len(temp) == 0:
675 return 0
676 else:
677 b[: len(temp)] = temp
678 return len(temp)
679
680 def supports_chunked_reads(self):
681 """
682 Checks if the underlying file-like object looks like a
683 httplib.HTTPResponse object. We do this by testing for the fp
684 attribute. If it is present we assume it returns raw chunks as
685 processed by read_chunked().
686 """
687 return hasattr(self._fp, "fp")
688
689 def _update_chunk_length(self):
690 # First, we'll figure out length of a chunk and then
691 # we'll try to read it from socket.
692 if self.chunk_left is not None:
693 return
694 line = self._fp.fp.readline()
695 line = line.split(b";", 1)[0]
696 try:
697 self.chunk_left = int(line, 16)
698 except ValueError:
699 # Invalid chunked protocol response, abort.
700 self.close()
701 raise httplib.IncompleteRead(line)
702
703 def _handle_chunk(self, amt):
704 returned_chunk = None
705 if amt is None:
706 chunk = self._fp._safe_read(self.chunk_left)
707 returned_chunk = chunk
708 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
709 self.chunk_left = None
710 elif amt < self.chunk_left:
711 value = self._fp._safe_read(amt)
712 self.chunk_left = self.chunk_left - amt
713 returned_chunk = value
714 elif amt == self.chunk_left:
715 value = self._fp._safe_read(amt)
716 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
717 self.chunk_left = None
718 returned_chunk = value
719 else: # amt > self.chunk_left
720 returned_chunk = self._fp._safe_read(self.chunk_left)
721 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
722 self.chunk_left = None
723 return returned_chunk
724
725 def read_chunked(self, amt=None, decode_content=None):
726 """
727 Similar to :meth:`HTTPResponse.read`, but with an additional
728 parameter: ``decode_content``.
729
730 :param amt:
731 How much of the content to read. If specified, caching is skipped
732 because it doesn't make sense to cache partial content as the full
733 response.
734
735 :param decode_content:
736 If True, will attempt to decode the body based on the
737 'content-encoding' header.
738 """
739 self._init_decoder()
740 # FIXME: Rewrite this method and make it a class with a better structured logic.
741 if not self.chunked:
742 raise ResponseNotChunked(
743 "Response is not chunked. "
744 "Header 'transfer-encoding: chunked' is missing."
745 )
746 if not self.supports_chunked_reads():
747 raise BodyNotHttplibCompatible(
748 "Body should be httplib.HTTPResponse like. "
749 "It should have have an fp attribute which returns raw chunks."
750 )
751
752 with self._error_catcher():
753 # Don't bother reading the body of a HEAD request.
754 if self._original_response and is_response_to_head(self._original_response):
755 self._original_response.close()
756 return
757
758 # If a response is already read and closed
759 # then return immediately.
760 if self._fp.fp is None:
761 return
762
763 while True:
764 self._update_chunk_length()
765 if self.chunk_left == 0:
766 break
767 chunk = self._handle_chunk(amt)
768 decoded = self._decode(
769 chunk, decode_content=decode_content, flush_decoder=False
770 )
771 if decoded:
772 yield decoded
773
774 if decode_content:
775 # On CPython and PyPy, we should never need to flush the
776 # decoder. However, on Jython we *might* need to, so
777 # lets defensively do it anyway.
778 decoded = self._flush_decoder()
779 if decoded: # Platform-specific: Jython.
780 yield decoded
781
782 # Chunk content ends with \r\n: discard it.
783 while True:
784 line = self._fp.fp.readline()
785 if not line:
786 # Some sites may not end with '\r\n'.
787 break
788 if line == b"\r\n":
789 break
790
791 # We read everything; close the "file".
792 if self._original_response:
793 self._original_response.close()
794
795 def geturl(self):
796 """
797 Returns the URL that was the source of this response.
798 If the request that generated this response redirected, this method
799 will return the final redirect location.
800 """
801 if self.retries is not None and len(self.retries.history):
802 return self.retries.history[-1].redirect_location
803 else:
804 return self._request_url
805
806 def __iter__(self):
807 buffer = []
808 for chunk in self.stream(decode_content=True):
809 if b"\n" in chunk:
810 chunk = chunk.split(b"\n")
811 yield b"".join(buffer) + chunk[0] + b"\n"
812 for x in chunk[1:-1]:
813 yield x + b"\n"
814 if chunk[-1]:
815 buffer = [chunk[-1]]
816 else:
817 buffer = []
818 else:
819 buffer.append(chunk)
820 if buffer:
821 yield b"".join(buffer)