comparison lib/python3.8/site-packages/pip/_vendor/urllib3/response.py @ 0:9e54283cc701 draft

"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author guerler
date Mon, 27 Jul 2020 03:47:31 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e54283cc701
1 from __future__ import absolute_import
2 from contextlib import contextmanager
3 import zlib
4 import io
5 import logging
6 from socket import timeout as SocketTimeout
7 from socket import error as SocketError
8
9 try:
10 import brotli
11 except ImportError:
12 brotli = None
13
14 from ._collections import HTTPHeaderDict
15 from .exceptions import (
16 BodyNotHttplibCompatible,
17 ProtocolError,
18 DecodeError,
19 ReadTimeoutError,
20 ResponseNotChunked,
21 IncompleteRead,
22 InvalidHeader,
23 )
24 from .packages.six import string_types as basestring, PY3
25 from .packages.six.moves import http_client as httplib
26 from .connection import HTTPException, BaseSSLError
27 from .util.response import is_fp_closed, is_response_to_head
28
29 log = logging.getLogger(__name__)
30
31
32 class DeflateDecoder(object):
33 def __init__(self):
34 self._first_try = True
35 self._data = b""
36 self._obj = zlib.decompressobj()
37
38 def __getattr__(self, name):
39 return getattr(self._obj, name)
40
41 def decompress(self, data):
42 if not data:
43 return data
44
45 if not self._first_try:
46 return self._obj.decompress(data)
47
48 self._data += data
49 try:
50 decompressed = self._obj.decompress(data)
51 if decompressed:
52 self._first_try = False
53 self._data = None
54 return decompressed
55 except zlib.error:
56 self._first_try = False
57 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
58 try:
59 return self.decompress(self._data)
60 finally:
61 self._data = None
62
63
64 class GzipDecoderState(object):
65
66 FIRST_MEMBER = 0
67 OTHER_MEMBERS = 1
68 SWALLOW_DATA = 2
69
70
71 class GzipDecoder(object):
72 def __init__(self):
73 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
74 self._state = GzipDecoderState.FIRST_MEMBER
75
76 def __getattr__(self, name):
77 return getattr(self._obj, name)
78
79 def decompress(self, data):
80 ret = bytearray()
81 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
82 return bytes(ret)
83 while True:
84 try:
85 ret += self._obj.decompress(data)
86 except zlib.error:
87 previous_state = self._state
88 # Ignore data after the first error
89 self._state = GzipDecoderState.SWALLOW_DATA
90 if previous_state == GzipDecoderState.OTHER_MEMBERS:
91 # Allow trailing garbage acceptable in other gzip clients
92 return bytes(ret)
93 raise
94 data = self._obj.unused_data
95 if not data:
96 return bytes(ret)
97 self._state = GzipDecoderState.OTHER_MEMBERS
98 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
99
100
101 if brotli is not None:
102
103 class BrotliDecoder(object):
104 # Supports both 'brotlipy' and 'Brotli' packages
105 # since they share an import name. The top branches
106 # are for 'brotlipy' and bottom branches for 'Brotli'
107 def __init__(self):
108 self._obj = brotli.Decompressor()
109
110 def decompress(self, data):
111 if hasattr(self._obj, "decompress"):
112 return self._obj.decompress(data)
113 return self._obj.process(data)
114
115 def flush(self):
116 if hasattr(self._obj, "flush"):
117 return self._obj.flush()
118 return b""
119
120
121 class MultiDecoder(object):
122 """
123 From RFC7231:
124 If one or more encodings have been applied to a representation, the
125 sender that applied the encodings MUST generate a Content-Encoding
126 header field that lists the content codings in the order in which
127 they were applied.
128 """
129
130 def __init__(self, modes):
131 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
132
133 def flush(self):
134 return self._decoders[0].flush()
135
136 def decompress(self, data):
137 for d in reversed(self._decoders):
138 data = d.decompress(data)
139 return data
140
141
142 def _get_decoder(mode):
143 if "," in mode:
144 return MultiDecoder(mode)
145
146 if mode == "gzip":
147 return GzipDecoder()
148
149 if brotli is not None and mode == "br":
150 return BrotliDecoder()
151
152 return DeflateDecoder()
153
154
155 class HTTPResponse(io.IOBase):
156 """
157 HTTP Response container.
158
159 Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
160 loaded and decoded on-demand when the ``data`` property is accessed. This
161 class is also compatible with the Python standard library's :mod:`io`
162 module, and can hence be treated as a readable object in the context of that
163 framework.
164
165 Extra parameters for behaviour not present in httplib.HTTPResponse:
166
167 :param preload_content:
168 If True, the response's body will be preloaded during construction.
169
170 :param decode_content:
171 If True, will attempt to decode the body based on the
172 'content-encoding' header.
173
174 :param original_response:
175 When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
176 object, it's convenient to include the original for debug purposes. It's
177 otherwise unused.
178
179 :param retries:
180 The retries contains the last :class:`~urllib3.util.retry.Retry` that
181 was used during the request.
182
183 :param enforce_content_length:
184 Enforce content length checking. Body returned by server must match
185 value of Content-Length header, if present. Otherwise, raise error.
186 """
187
188 CONTENT_DECODERS = ["gzip", "deflate"]
189 if brotli is not None:
190 CONTENT_DECODERS += ["br"]
191 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
192
193 def __init__(
194 self,
195 body="",
196 headers=None,
197 status=0,
198 version=0,
199 reason=None,
200 strict=0,
201 preload_content=True,
202 decode_content=True,
203 original_response=None,
204 pool=None,
205 connection=None,
206 msg=None,
207 retries=None,
208 enforce_content_length=False,
209 request_method=None,
210 request_url=None,
211 auto_close=True,
212 ):
213
214 if isinstance(headers, HTTPHeaderDict):
215 self.headers = headers
216 else:
217 self.headers = HTTPHeaderDict(headers)
218 self.status = status
219 self.version = version
220 self.reason = reason
221 self.strict = strict
222 self.decode_content = decode_content
223 self.retries = retries
224 self.enforce_content_length = enforce_content_length
225 self.auto_close = auto_close
226
227 self._decoder = None
228 self._body = None
229 self._fp = None
230 self._original_response = original_response
231 self._fp_bytes_read = 0
232 self.msg = msg
233 self._request_url = request_url
234
235 if body and isinstance(body, (basestring, bytes)):
236 self._body = body
237
238 self._pool = pool
239 self._connection = connection
240
241 if hasattr(body, "read"):
242 self._fp = body
243
244 # Are we using the chunked-style of transfer encoding?
245 self.chunked = False
246 self.chunk_left = None
247 tr_enc = self.headers.get("transfer-encoding", "").lower()
248 # Don't incur the penalty of creating a list and then discarding it
249 encodings = (enc.strip() for enc in tr_enc.split(","))
250 if "chunked" in encodings:
251 self.chunked = True
252
253 # Determine length of response
254 self.length_remaining = self._init_length(request_method)
255
256 # If requested, preload the body.
257 if preload_content and not self._body:
258 self._body = self.read(decode_content=decode_content)
259
260 def get_redirect_location(self):
261 """
262 Should we redirect and where to?
263
264 :returns: Truthy redirect location string if we got a redirect status
265 code and valid location. ``None`` if redirect status and no
266 location. ``False`` if not a redirect status code.
267 """
268 if self.status in self.REDIRECT_STATUSES:
269 return self.headers.get("location")
270
271 return False
272
273 def release_conn(self):
274 if not self._pool or not self._connection:
275 return
276
277 self._pool._put_conn(self._connection)
278 self._connection = None
279
280 @property
281 def data(self):
282 # For backwords-compat with earlier urllib3 0.4 and earlier.
283 if self._body:
284 return self._body
285
286 if self._fp:
287 return self.read(cache_content=True)
288
289 @property
290 def connection(self):
291 return self._connection
292
293 def isclosed(self):
294 return is_fp_closed(self._fp)
295
296 def tell(self):
297 """
298 Obtain the number of bytes pulled over the wire so far. May differ from
299 the amount of content returned by :meth:``HTTPResponse.read`` if bytes
300 are encoded on the wire (e.g, compressed).
301 """
302 return self._fp_bytes_read
303
304 def _init_length(self, request_method):
305 """
306 Set initial length value for Response content if available.
307 """
308 length = self.headers.get("content-length")
309
310 if length is not None:
311 if self.chunked:
312 # This Response will fail with an IncompleteRead if it can't be
313 # received as chunked. This method falls back to attempt reading
314 # the response before raising an exception.
315 log.warning(
316 "Received response with both Content-Length and "
317 "Transfer-Encoding set. This is expressly forbidden "
318 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
319 "attempting to process response as Transfer-Encoding: "
320 "chunked."
321 )
322 return None
323
324 try:
325 # RFC 7230 section 3.3.2 specifies multiple content lengths can
326 # be sent in a single Content-Length header
327 # (e.g. Content-Length: 42, 42). This line ensures the values
328 # are all valid ints and that as long as the `set` length is 1,
329 # all values are the same. Otherwise, the header is invalid.
330 lengths = set([int(val) for val in length.split(",")])
331 if len(lengths) > 1:
332 raise InvalidHeader(
333 "Content-Length contained multiple "
334 "unmatching values (%s)" % length
335 )
336 length = lengths.pop()
337 except ValueError:
338 length = None
339 else:
340 if length < 0:
341 length = None
342
343 # Convert status to int for comparison
344 # In some cases, httplib returns a status of "_UNKNOWN"
345 try:
346 status = int(self.status)
347 except ValueError:
348 status = 0
349
350 # Check for responses that shouldn't include a body
351 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
352 length = 0
353
354 return length
355
356 def _init_decoder(self):
357 """
358 Set-up the _decoder attribute if necessary.
359 """
360 # Note: content-encoding value should be case-insensitive, per RFC 7230
361 # Section 3.2
362 content_encoding = self.headers.get("content-encoding", "").lower()
363 if self._decoder is None:
364 if content_encoding in self.CONTENT_DECODERS:
365 self._decoder = _get_decoder(content_encoding)
366 elif "," in content_encoding:
367 encodings = [
368 e.strip()
369 for e in content_encoding.split(",")
370 if e.strip() in self.CONTENT_DECODERS
371 ]
372 if len(encodings):
373 self._decoder = _get_decoder(content_encoding)
374
375 DECODER_ERROR_CLASSES = (IOError, zlib.error)
376 if brotli is not None:
377 DECODER_ERROR_CLASSES += (brotli.error,)
378
379 def _decode(self, data, decode_content, flush_decoder):
380 """
381 Decode the data passed in and potentially flush the decoder.
382 """
383 if not decode_content:
384 return data
385
386 try:
387 if self._decoder:
388 data = self._decoder.decompress(data)
389 except self.DECODER_ERROR_CLASSES as e:
390 content_encoding = self.headers.get("content-encoding", "").lower()
391 raise DecodeError(
392 "Received response with content-encoding: %s, but "
393 "failed to decode it." % content_encoding,
394 e,
395 )
396 if flush_decoder:
397 data += self._flush_decoder()
398
399 return data
400
401 def _flush_decoder(self):
402 """
403 Flushes the decoder. Should only be called if the decoder is actually
404 being used.
405 """
406 if self._decoder:
407 buf = self._decoder.decompress(b"")
408 return buf + self._decoder.flush()
409
410 return b""
411
412 @contextmanager
413 def _error_catcher(self):
414 """
415 Catch low-level python exceptions, instead re-raising urllib3
416 variants, so that low-level exceptions are not leaked in the
417 high-level api.
418
419 On exit, release the connection back to the pool.
420 """
421 clean_exit = False
422
423 try:
424 try:
425 yield
426
427 except SocketTimeout:
428 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
429 # there is yet no clean way to get at it from this context.
430 raise ReadTimeoutError(self._pool, None, "Read timed out.")
431
432 except BaseSSLError as e:
433 # FIXME: Is there a better way to differentiate between SSLErrors?
434 if "read operation timed out" not in str(e): # Defensive:
435 # This shouldn't happen but just in case we're missing an edge
436 # case, let's avoid swallowing SSL errors.
437 raise
438
439 raise ReadTimeoutError(self._pool, None, "Read timed out.")
440
441 except (HTTPException, SocketError) as e:
442 # This includes IncompleteRead.
443 raise ProtocolError("Connection broken: %r" % e, e)
444
445 # If no exception is thrown, we should avoid cleaning up
446 # unnecessarily.
447 clean_exit = True
448 finally:
449 # If we didn't terminate cleanly, we need to throw away our
450 # connection.
451 if not clean_exit:
452 # The response may not be closed but we're not going to use it
453 # anymore so close it now to ensure that the connection is
454 # released back to the pool.
455 if self._original_response:
456 self._original_response.close()
457
458 # Closing the response may not actually be sufficient to close
459 # everything, so if we have a hold of the connection close that
460 # too.
461 if self._connection:
462 self._connection.close()
463
464 # If we hold the original response but it's closed now, we should
465 # return the connection back to the pool.
466 if self._original_response and self._original_response.isclosed():
467 self.release_conn()
468
469 def read(self, amt=None, decode_content=None, cache_content=False):
470 """
471 Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
472 parameters: ``decode_content`` and ``cache_content``.
473
474 :param amt:
475 How much of the content to read. If specified, caching is skipped
476 because it doesn't make sense to cache partial content as the full
477 response.
478
479 :param decode_content:
480 If True, will attempt to decode the body based on the
481 'content-encoding' header.
482
483 :param cache_content:
484 If True, will save the returned data such that the same result is
485 returned despite of the state of the underlying file object. This
486 is useful if you want the ``.data`` property to continue working
487 after having ``.read()`` the file object. (Overridden if ``amt`` is
488 set.)
489 """
490 self._init_decoder()
491 if decode_content is None:
492 decode_content = self.decode_content
493
494 if self._fp is None:
495 return
496
497 flush_decoder = False
498 fp_closed = getattr(self._fp, "closed", False)
499
500 with self._error_catcher():
501 if amt is None:
502 # cStringIO doesn't like amt=None
503 data = self._fp.read() if not fp_closed else b""
504 flush_decoder = True
505 else:
506 cache_content = False
507 data = self._fp.read(amt) if not fp_closed else b""
508 if (
509 amt != 0 and not data
510 ): # Platform-specific: Buggy versions of Python.
511 # Close the connection when no data is returned
512 #
513 # This is redundant to what httplib/http.client _should_
514 # already do. However, versions of python released before
515 # December 15, 2012 (http://bugs.python.org/issue16298) do
516 # not properly close the connection in all cases. There is
517 # no harm in redundantly calling close.
518 self._fp.close()
519 flush_decoder = True
520 if self.enforce_content_length and self.length_remaining not in (
521 0,
522 None,
523 ):
524 # This is an edge case that httplib failed to cover due
525 # to concerns of backward compatibility. We're
526 # addressing it here to make sure IncompleteRead is
527 # raised during streaming, so all calls with incorrect
528 # Content-Length are caught.
529 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
530
531 if data:
532 self._fp_bytes_read += len(data)
533 if self.length_remaining is not None:
534 self.length_remaining -= len(data)
535
536 data = self._decode(data, decode_content, flush_decoder)
537
538 if cache_content:
539 self._body = data
540
541 return data
542
543 def stream(self, amt=2 ** 16, decode_content=None):
544 """
545 A generator wrapper for the read() method. A call will block until
546 ``amt`` bytes have been read from the connection or until the
547 connection is closed.
548
549 :param amt:
550 How much of the content to read. The generator will return up to
551 much data per iteration, but may return less. This is particularly
552 likely when using compressed data. However, the empty string will
553 never be returned.
554
555 :param decode_content:
556 If True, will attempt to decode the body based on the
557 'content-encoding' header.
558 """
559 if self.chunked and self.supports_chunked_reads():
560 for line in self.read_chunked(amt, decode_content=decode_content):
561 yield line
562 else:
563 while not is_fp_closed(self._fp):
564 data = self.read(amt=amt, decode_content=decode_content)
565
566 if data:
567 yield data
568
569 @classmethod
570 def from_httplib(ResponseCls, r, **response_kw):
571 """
572 Given an :class:`httplib.HTTPResponse` instance ``r``, return a
573 corresponding :class:`urllib3.response.HTTPResponse` object.
574
575 Remaining parameters are passed to the HTTPResponse constructor, along
576 with ``original_response=r``.
577 """
578 headers = r.msg
579
580 if not isinstance(headers, HTTPHeaderDict):
581 if PY3:
582 headers = HTTPHeaderDict(headers.items())
583 else:
584 # Python 2.7
585 headers = HTTPHeaderDict.from_httplib(headers)
586
587 # HTTPResponse objects in Python 3 don't have a .strict attribute
588 strict = getattr(r, "strict", 0)
589 resp = ResponseCls(
590 body=r,
591 headers=headers,
592 status=r.status,
593 version=r.version,
594 reason=r.reason,
595 strict=strict,
596 original_response=r,
597 **response_kw
598 )
599 return resp
600
601 # Backwards-compatibility methods for httplib.HTTPResponse
602 def getheaders(self):
603 return self.headers
604
605 def getheader(self, name, default=None):
606 return self.headers.get(name, default)
607
608 # Backwards compatibility for http.cookiejar
609 def info(self):
610 return self.headers
611
612 # Overrides from io.IOBase
613 def close(self):
614 if not self.closed:
615 self._fp.close()
616
617 if self._connection:
618 self._connection.close()
619
620 if not self.auto_close:
621 io.IOBase.close(self)
622
623 @property
624 def closed(self):
625 if not self.auto_close:
626 return io.IOBase.closed.__get__(self)
627 elif self._fp is None:
628 return True
629 elif hasattr(self._fp, "isclosed"):
630 return self._fp.isclosed()
631 elif hasattr(self._fp, "closed"):
632 return self._fp.closed
633 else:
634 return True
635
636 def fileno(self):
637 if self._fp is None:
638 raise IOError("HTTPResponse has no file to get a fileno from")
639 elif hasattr(self._fp, "fileno"):
640 return self._fp.fileno()
641 else:
642 raise IOError(
643 "The file-like object this HTTPResponse is wrapped "
644 "around has no file descriptor"
645 )
646
647 def flush(self):
648 if (
649 self._fp is not None
650 and hasattr(self._fp, "flush")
651 and not getattr(self._fp, "closed", False)
652 ):
653 return self._fp.flush()
654
655 def readable(self):
656 # This method is required for `io` module compatibility.
657 return True
658
659 def readinto(self, b):
660 # This method is required for `io` module compatibility.
661 temp = self.read(len(b))
662 if len(temp) == 0:
663 return 0
664 else:
665 b[: len(temp)] = temp
666 return len(temp)
667
668 def supports_chunked_reads(self):
669 """
670 Checks if the underlying file-like object looks like a
671 httplib.HTTPResponse object. We do this by testing for the fp
672 attribute. If it is present we assume it returns raw chunks as
673 processed by read_chunked().
674 """
675 return hasattr(self._fp, "fp")
676
677 def _update_chunk_length(self):
678 # First, we'll figure out length of a chunk and then
679 # we'll try to read it from socket.
680 if self.chunk_left is not None:
681 return
682 line = self._fp.fp.readline()
683 line = line.split(b";", 1)[0]
684 try:
685 self.chunk_left = int(line, 16)
686 except ValueError:
687 # Invalid chunked protocol response, abort.
688 self.close()
689 raise httplib.IncompleteRead(line)
690
691 def _handle_chunk(self, amt):
692 returned_chunk = None
693 if amt is None:
694 chunk = self._fp._safe_read(self.chunk_left)
695 returned_chunk = chunk
696 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
697 self.chunk_left = None
698 elif amt < self.chunk_left:
699 value = self._fp._safe_read(amt)
700 self.chunk_left = self.chunk_left - amt
701 returned_chunk = value
702 elif amt == self.chunk_left:
703 value = self._fp._safe_read(amt)
704 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
705 self.chunk_left = None
706 returned_chunk = value
707 else: # amt > self.chunk_left
708 returned_chunk = self._fp._safe_read(self.chunk_left)
709 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
710 self.chunk_left = None
711 return returned_chunk
712
713 def read_chunked(self, amt=None, decode_content=None):
714 """
715 Similar to :meth:`HTTPResponse.read`, but with an additional
716 parameter: ``decode_content``.
717
718 :param amt:
719 How much of the content to read. If specified, caching is skipped
720 because it doesn't make sense to cache partial content as the full
721 response.
722
723 :param decode_content:
724 If True, will attempt to decode the body based on the
725 'content-encoding' header.
726 """
727 self._init_decoder()
728 # FIXME: Rewrite this method and make it a class with a better structured logic.
729 if not self.chunked:
730 raise ResponseNotChunked(
731 "Response is not chunked. "
732 "Header 'transfer-encoding: chunked' is missing."
733 )
734 if not self.supports_chunked_reads():
735 raise BodyNotHttplibCompatible(
736 "Body should be httplib.HTTPResponse like. "
737 "It should have have an fp attribute which returns raw chunks."
738 )
739
740 with self._error_catcher():
741 # Don't bother reading the body of a HEAD request.
742 if self._original_response and is_response_to_head(self._original_response):
743 self._original_response.close()
744 return
745
746 # If a response is already read and closed
747 # then return immediately.
748 if self._fp.fp is None:
749 return
750
751 while True:
752 self._update_chunk_length()
753 if self.chunk_left == 0:
754 break
755 chunk = self._handle_chunk(amt)
756 decoded = self._decode(
757 chunk, decode_content=decode_content, flush_decoder=False
758 )
759 if decoded:
760 yield decoded
761
762 if decode_content:
763 # On CPython and PyPy, we should never need to flush the
764 # decoder. However, on Jython we *might* need to, so
765 # lets defensively do it anyway.
766 decoded = self._flush_decoder()
767 if decoded: # Platform-specific: Jython.
768 yield decoded
769
770 # Chunk content ends with \r\n: discard it.
771 while True:
772 line = self._fp.fp.readline()
773 if not line:
774 # Some sites may not end with '\r\n'.
775 break
776 if line == b"\r\n":
777 break
778
779 # We read everything; close the "file".
780 if self._original_response:
781 self._original_response.close()
782
783 def geturl(self):
784 """
785 Returns the URL that was the source of this response.
786 If the request that generated this response redirected, this method
787 will return the final redirect location.
788 """
789 if self.retries is not None and len(self.retries.history):
790 return self.retries.history[-1].redirect_location
791 else:
792 return self._request_url
793
794 def __iter__(self):
795 buffer = [b""]
796 for chunk in self.stream(decode_content=True):
797 if b"\n" in chunk:
798 chunk = chunk.split(b"\n")
799 yield b"".join(buffer) + chunk[0] + b"\n"
800 for x in chunk[1:-1]:
801 yield x + b"\n"
802 if chunk[-1]:
803 buffer = [chunk[-1]]
804 else:
805 buffer = []
806 else:
807 buffer.append(chunk)
808 if buffer:
809 yield b"".join(buffer)