comparison planemo/lib/python3.7/site-packages/urllib3/response.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 from __future__ import absolute_import
2 from contextlib import contextmanager
3 import zlib
4 import io
5 import logging
6 from socket import timeout as SocketTimeout
7 from socket import error as SocketError
8
9 try:
10 import brotli
11 except ImportError:
12 brotli = None
13
14 from ._collections import HTTPHeaderDict
15 from .exceptions import (
16 BodyNotHttplibCompatible,
17 ProtocolError,
18 DecodeError,
19 ReadTimeoutError,
20 ResponseNotChunked,
21 IncompleteRead,
22 InvalidHeader,
23 HTTPError,
24 )
25 from .packages.six import string_types as basestring, PY3
26 from .packages.six.moves import http_client as httplib
27 from .connection import HTTPException, BaseSSLError
28 from .util.response import is_fp_closed, is_response_to_head
29
30 log = logging.getLogger(__name__)
31
32
33 class DeflateDecoder(object):
34 def __init__(self):
35 self._first_try = True
36 self._data = b""
37 self._obj = zlib.decompressobj()
38
39 def __getattr__(self, name):
40 return getattr(self._obj, name)
41
42 def decompress(self, data):
43 if not data:
44 return data
45
46 if not self._first_try:
47 return self._obj.decompress(data)
48
49 self._data += data
50 try:
51 decompressed = self._obj.decompress(data)
52 if decompressed:
53 self._first_try = False
54 self._data = None
55 return decompressed
56 except zlib.error:
57 self._first_try = False
58 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
59 try:
60 return self.decompress(self._data)
61 finally:
62 self._data = None
63
64
65 class GzipDecoderState(object):
66
67 FIRST_MEMBER = 0
68 OTHER_MEMBERS = 1
69 SWALLOW_DATA = 2
70
71
72 class GzipDecoder(object):
73 def __init__(self):
74 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
75 self._state = GzipDecoderState.FIRST_MEMBER
76
77 def __getattr__(self, name):
78 return getattr(self._obj, name)
79
80 def decompress(self, data):
81 ret = bytearray()
82 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
83 return bytes(ret)
84 while True:
85 try:
86 ret += self._obj.decompress(data)
87 except zlib.error:
88 previous_state = self._state
89 # Ignore data after the first error
90 self._state = GzipDecoderState.SWALLOW_DATA
91 if previous_state == GzipDecoderState.OTHER_MEMBERS:
92 # Allow trailing garbage acceptable in other gzip clients
93 return bytes(ret)
94 raise
95 data = self._obj.unused_data
96 if not data:
97 return bytes(ret)
98 self._state = GzipDecoderState.OTHER_MEMBERS
99 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
100
101
102 if brotli is not None:
103
104 class BrotliDecoder(object):
105 # Supports both 'brotlipy' and 'Brotli' packages
106 # since they share an import name. The top branches
107 # are for 'brotlipy' and bottom branches for 'Brotli'
108 def __init__(self):
109 self._obj = brotli.Decompressor()
110 if hasattr(self._obj, "decompress"):
111 self.decompress = self._obj.decompress
112 else:
113 self.decompress = self._obj.process
114
115 def flush(self):
116 if hasattr(self._obj, "flush"):
117 return self._obj.flush()
118 return b""
119
120
121 class MultiDecoder(object):
122 """
123 From RFC7231:
124 If one or more encodings have been applied to a representation, the
125 sender that applied the encodings MUST generate a Content-Encoding
126 header field that lists the content codings in the order in which
127 they were applied.
128 """
129
130 def __init__(self, modes):
131 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
132
133 def flush(self):
134 return self._decoders[0].flush()
135
136 def decompress(self, data):
137 for d in reversed(self._decoders):
138 data = d.decompress(data)
139 return data
140
141
142 def _get_decoder(mode):
143 if "," in mode:
144 return MultiDecoder(mode)
145
146 if mode == "gzip":
147 return GzipDecoder()
148
149 if brotli is not None and mode == "br":
150 return BrotliDecoder()
151
152 return DeflateDecoder()
153
154
155 class HTTPResponse(io.IOBase):
156 """
157 HTTP Response container.
158
159 Backwards-compatible to httplib's HTTPResponse but the response ``body`` is
160 loaded and decoded on-demand when the ``data`` property is accessed. This
161 class is also compatible with the Python standard library's :mod:`io`
162 module, and can hence be treated as a readable object in the context of that
163 framework.
164
165 Extra parameters for behaviour not present in httplib.HTTPResponse:
166
167 :param preload_content:
168 If True, the response's body will be preloaded during construction.
169
170 :param decode_content:
171 If True, will attempt to decode the body based on the
172 'content-encoding' header.
173
174 :param original_response:
175 When this HTTPResponse wrapper is generated from an httplib.HTTPResponse
176 object, it's convenient to include the original for debug purposes. It's
177 otherwise unused.
178
179 :param retries:
180 The retries contains the last :class:`~urllib3.util.retry.Retry` that
181 was used during the request.
182
183 :param enforce_content_length:
184 Enforce content length checking. Body returned by server must match
185 value of Content-Length header, if present. Otherwise, raise error.
186 """
187
188 CONTENT_DECODERS = ["gzip", "deflate"]
189 if brotli is not None:
190 CONTENT_DECODERS += ["br"]
191 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
192
193 def __init__(
194 self,
195 body="",
196 headers=None,
197 status=0,
198 version=0,
199 reason=None,
200 strict=0,
201 preload_content=True,
202 decode_content=True,
203 original_response=None,
204 pool=None,
205 connection=None,
206 msg=None,
207 retries=None,
208 enforce_content_length=False,
209 request_method=None,
210 request_url=None,
211 auto_close=True,
212 ):
213
214 if isinstance(headers, HTTPHeaderDict):
215 self.headers = headers
216 else:
217 self.headers = HTTPHeaderDict(headers)
218 self.status = status
219 self.version = version
220 self.reason = reason
221 self.strict = strict
222 self.decode_content = decode_content
223 self.retries = retries
224 self.enforce_content_length = enforce_content_length
225 self.auto_close = auto_close
226
227 self._decoder = None
228 self._body = None
229 self._fp = None
230 self._original_response = original_response
231 self._fp_bytes_read = 0
232 self.msg = msg
233 self._request_url = request_url
234
235 if body and isinstance(body, (basestring, bytes)):
236 self._body = body
237
238 self._pool = pool
239 self._connection = connection
240
241 if hasattr(body, "read"):
242 self._fp = body
243
244 # Are we using the chunked-style of transfer encoding?
245 self.chunked = False
246 self.chunk_left = None
247 tr_enc = self.headers.get("transfer-encoding", "").lower()
248 # Don't incur the penalty of creating a list and then discarding it
249 encodings = (enc.strip() for enc in tr_enc.split(","))
250 if "chunked" in encodings:
251 self.chunked = True
252
253 # Determine length of response
254 self.length_remaining = self._init_length(request_method)
255
256 # If requested, preload the body.
257 if preload_content and not self._body:
258 self._body = self.read(decode_content=decode_content)
259
260 def get_redirect_location(self):
261 """
262 Should we redirect and where to?
263
264 :returns: Truthy redirect location string if we got a redirect status
265 code and valid location. ``None`` if redirect status and no
266 location. ``False`` if not a redirect status code.
267 """
268 if self.status in self.REDIRECT_STATUSES:
269 return self.headers.get("location")
270
271 return False
272
273 def release_conn(self):
274 if not self._pool or not self._connection:
275 return
276
277 self._pool._put_conn(self._connection)
278 self._connection = None
279
280 def drain_conn(self):
281 """
282 Read and discard any remaining HTTP response data in the response connection.
283
284 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
285 """
286 try:
287 self.read()
288 except (HTTPError, SocketError, BaseSSLError, HTTPException):
289 pass
290
291 @property
292 def data(self):
293 # For backwords-compat with earlier urllib3 0.4 and earlier.
294 if self._body:
295 return self._body
296
297 if self._fp:
298 return self.read(cache_content=True)
299
300 @property
301 def connection(self):
302 return self._connection
303
304 def isclosed(self):
305 return is_fp_closed(self._fp)
306
307 def tell(self):
308 """
309 Obtain the number of bytes pulled over the wire so far. May differ from
310 the amount of content returned by :meth:``HTTPResponse.read`` if bytes
311 are encoded on the wire (e.g, compressed).
312 """
313 return self._fp_bytes_read
314
315 def _init_length(self, request_method):
316 """
317 Set initial length value for Response content if available.
318 """
319 length = self.headers.get("content-length")
320
321 if length is not None:
322 if self.chunked:
323 # This Response will fail with an IncompleteRead if it can't be
324 # received as chunked. This method falls back to attempt reading
325 # the response before raising an exception.
326 log.warning(
327 "Received response with both Content-Length and "
328 "Transfer-Encoding set. This is expressly forbidden "
329 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
330 "attempting to process response as Transfer-Encoding: "
331 "chunked."
332 )
333 return None
334
335 try:
336 # RFC 7230 section 3.3.2 specifies multiple content lengths can
337 # be sent in a single Content-Length header
338 # (e.g. Content-Length: 42, 42). This line ensures the values
339 # are all valid ints and that as long as the `set` length is 1,
340 # all values are the same. Otherwise, the header is invalid.
341 lengths = set([int(val) for val in length.split(",")])
342 if len(lengths) > 1:
343 raise InvalidHeader(
344 "Content-Length contained multiple "
345 "unmatching values (%s)" % length
346 )
347 length = lengths.pop()
348 except ValueError:
349 length = None
350 else:
351 if length < 0:
352 length = None
353
354 # Convert status to int for comparison
355 # In some cases, httplib returns a status of "_UNKNOWN"
356 try:
357 status = int(self.status)
358 except ValueError:
359 status = 0
360
361 # Check for responses that shouldn't include a body
362 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
363 length = 0
364
365 return length
366
367 def _init_decoder(self):
368 """
369 Set-up the _decoder attribute if necessary.
370 """
371 # Note: content-encoding value should be case-insensitive, per RFC 7230
372 # Section 3.2
373 content_encoding = self.headers.get("content-encoding", "").lower()
374 if self._decoder is None:
375 if content_encoding in self.CONTENT_DECODERS:
376 self._decoder = _get_decoder(content_encoding)
377 elif "," in content_encoding:
378 encodings = [
379 e.strip()
380 for e in content_encoding.split(",")
381 if e.strip() in self.CONTENT_DECODERS
382 ]
383 if len(encodings):
384 self._decoder = _get_decoder(content_encoding)
385
386 DECODER_ERROR_CLASSES = (IOError, zlib.error)
387 if brotli is not None:
388 DECODER_ERROR_CLASSES += (brotli.error,)
389
390 def _decode(self, data, decode_content, flush_decoder):
391 """
392 Decode the data passed in and potentially flush the decoder.
393 """
394 if not decode_content:
395 return data
396
397 try:
398 if self._decoder:
399 data = self._decoder.decompress(data)
400 except self.DECODER_ERROR_CLASSES as e:
401 content_encoding = self.headers.get("content-encoding", "").lower()
402 raise DecodeError(
403 "Received response with content-encoding: %s, but "
404 "failed to decode it." % content_encoding,
405 e,
406 )
407 if flush_decoder:
408 data += self._flush_decoder()
409
410 return data
411
412 def _flush_decoder(self):
413 """
414 Flushes the decoder. Should only be called if the decoder is actually
415 being used.
416 """
417 if self._decoder:
418 buf = self._decoder.decompress(b"")
419 return buf + self._decoder.flush()
420
421 return b""
422
423 @contextmanager
424 def _error_catcher(self):
425 """
426 Catch low-level python exceptions, instead re-raising urllib3
427 variants, so that low-level exceptions are not leaked in the
428 high-level api.
429
430 On exit, release the connection back to the pool.
431 """
432 clean_exit = False
433
434 try:
435 try:
436 yield
437
438 except SocketTimeout:
439 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
440 # there is yet no clean way to get at it from this context.
441 raise ReadTimeoutError(self._pool, None, "Read timed out.")
442
443 except BaseSSLError as e:
444 # FIXME: Is there a better way to differentiate between SSLErrors?
445 if "read operation timed out" not in str(e): # Defensive:
446 # This shouldn't happen but just in case we're missing an edge
447 # case, let's avoid swallowing SSL errors.
448 raise
449
450 raise ReadTimeoutError(self._pool, None, "Read timed out.")
451
452 except (HTTPException, SocketError) as e:
453 # This includes IncompleteRead.
454 raise ProtocolError("Connection broken: %r" % e, e)
455
456 # If no exception is thrown, we should avoid cleaning up
457 # unnecessarily.
458 clean_exit = True
459 finally:
460 # If we didn't terminate cleanly, we need to throw away our
461 # connection.
462 if not clean_exit:
463 # The response may not be closed but we're not going to use it
464 # anymore so close it now to ensure that the connection is
465 # released back to the pool.
466 if self._original_response:
467 self._original_response.close()
468
469 # Closing the response may not actually be sufficient to close
470 # everything, so if we have a hold of the connection close that
471 # too.
472 if self._connection:
473 self._connection.close()
474
475 # If we hold the original response but it's closed now, we should
476 # return the connection back to the pool.
477 if self._original_response and self._original_response.isclosed():
478 self.release_conn()
479
480 def read(self, amt=None, decode_content=None, cache_content=False):
481 """
482 Similar to :meth:`httplib.HTTPResponse.read`, but with two additional
483 parameters: ``decode_content`` and ``cache_content``.
484
485 :param amt:
486 How much of the content to read. If specified, caching is skipped
487 because it doesn't make sense to cache partial content as the full
488 response.
489
490 :param decode_content:
491 If True, will attempt to decode the body based on the
492 'content-encoding' header.
493
494 :param cache_content:
495 If True, will save the returned data such that the same result is
496 returned despite of the state of the underlying file object. This
497 is useful if you want the ``.data`` property to continue working
498 after having ``.read()`` the file object. (Overridden if ``amt`` is
499 set.)
500 """
501 self._init_decoder()
502 if decode_content is None:
503 decode_content = self.decode_content
504
505 if self._fp is None:
506 return
507
508 flush_decoder = False
509 fp_closed = getattr(self._fp, "closed", False)
510
511 with self._error_catcher():
512 if amt is None:
513 # cStringIO doesn't like amt=None
514 data = self._fp.read() if not fp_closed else b""
515 flush_decoder = True
516 else:
517 cache_content = False
518 data = self._fp.read(amt) if not fp_closed else b""
519 if (
520 amt != 0 and not data
521 ): # Platform-specific: Buggy versions of Python.
522 # Close the connection when no data is returned
523 #
524 # This is redundant to what httplib/http.client _should_
525 # already do. However, versions of python released before
526 # December 15, 2012 (http://bugs.python.org/issue16298) do
527 # not properly close the connection in all cases. There is
528 # no harm in redundantly calling close.
529 self._fp.close()
530 flush_decoder = True
531 if self.enforce_content_length and self.length_remaining not in (
532 0,
533 None,
534 ):
535 # This is an edge case that httplib failed to cover due
536 # to concerns of backward compatibility. We're
537 # addressing it here to make sure IncompleteRead is
538 # raised during streaming, so all calls with incorrect
539 # Content-Length are caught.
540 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
541
542 if data:
543 self._fp_bytes_read += len(data)
544 if self.length_remaining is not None:
545 self.length_remaining -= len(data)
546
547 data = self._decode(data, decode_content, flush_decoder)
548
549 if cache_content:
550 self._body = data
551
552 return data
553
554 def stream(self, amt=2 ** 16, decode_content=None):
555 """
556 A generator wrapper for the read() method. A call will block until
557 ``amt`` bytes have been read from the connection or until the
558 connection is closed.
559
560 :param amt:
561 How much of the content to read. The generator will return up to
562 much data per iteration, but may return less. This is particularly
563 likely when using compressed data. However, the empty string will
564 never be returned.
565
566 :param decode_content:
567 If True, will attempt to decode the body based on the
568 'content-encoding' header.
569 """
570 if self.chunked and self.supports_chunked_reads():
571 for line in self.read_chunked(amt, decode_content=decode_content):
572 yield line
573 else:
574 while not is_fp_closed(self._fp):
575 data = self.read(amt=amt, decode_content=decode_content)
576
577 if data:
578 yield data
579
580 @classmethod
581 def from_httplib(ResponseCls, r, **response_kw):
582 """
583 Given an :class:`httplib.HTTPResponse` instance ``r``, return a
584 corresponding :class:`urllib3.response.HTTPResponse` object.
585
586 Remaining parameters are passed to the HTTPResponse constructor, along
587 with ``original_response=r``.
588 """
589 headers = r.msg
590
591 if not isinstance(headers, HTTPHeaderDict):
592 if PY3:
593 headers = HTTPHeaderDict(headers.items())
594 else:
595 # Python 2.7
596 headers = HTTPHeaderDict.from_httplib(headers)
597
598 # HTTPResponse objects in Python 3 don't have a .strict attribute
599 strict = getattr(r, "strict", 0)
600 resp = ResponseCls(
601 body=r,
602 headers=headers,
603 status=r.status,
604 version=r.version,
605 reason=r.reason,
606 strict=strict,
607 original_response=r,
608 **response_kw
609 )
610 return resp
611
612 # Backwards-compatibility methods for httplib.HTTPResponse
613 def getheaders(self):
614 return self.headers
615
616 def getheader(self, name, default=None):
617 return self.headers.get(name, default)
618
619 # Backwards compatibility for http.cookiejar
620 def info(self):
621 return self.headers
622
623 # Overrides from io.IOBase
624 def close(self):
625 if not self.closed:
626 self._fp.close()
627
628 if self._connection:
629 self._connection.close()
630
631 if not self.auto_close:
632 io.IOBase.close(self)
633
634 @property
635 def closed(self):
636 if not self.auto_close:
637 return io.IOBase.closed.__get__(self)
638 elif self._fp is None:
639 return True
640 elif hasattr(self._fp, "isclosed"):
641 return self._fp.isclosed()
642 elif hasattr(self._fp, "closed"):
643 return self._fp.closed
644 else:
645 return True
646
647 def fileno(self):
648 if self._fp is None:
649 raise IOError("HTTPResponse has no file to get a fileno from")
650 elif hasattr(self._fp, "fileno"):
651 return self._fp.fileno()
652 else:
653 raise IOError(
654 "The file-like object this HTTPResponse is wrapped "
655 "around has no file descriptor"
656 )
657
658 def flush(self):
659 if (
660 self._fp is not None
661 and hasattr(self._fp, "flush")
662 and not getattr(self._fp, "closed", False)
663 ):
664 return self._fp.flush()
665
666 def readable(self):
667 # This method is required for `io` module compatibility.
668 return True
669
670 def readinto(self, b):
671 # This method is required for `io` module compatibility.
672 temp = self.read(len(b))
673 if len(temp) == 0:
674 return 0
675 else:
676 b[: len(temp)] = temp
677 return len(temp)
678
679 def supports_chunked_reads(self):
680 """
681 Checks if the underlying file-like object looks like a
682 httplib.HTTPResponse object. We do this by testing for the fp
683 attribute. If it is present we assume it returns raw chunks as
684 processed by read_chunked().
685 """
686 return hasattr(self._fp, "fp")
687
688 def _update_chunk_length(self):
689 # First, we'll figure out length of a chunk and then
690 # we'll try to read it from socket.
691 if self.chunk_left is not None:
692 return
693 line = self._fp.fp.readline()
694 line = line.split(b";", 1)[0]
695 try:
696 self.chunk_left = int(line, 16)
697 except ValueError:
698 # Invalid chunked protocol response, abort.
699 self.close()
700 raise httplib.IncompleteRead(line)
701
702 def _handle_chunk(self, amt):
703 returned_chunk = None
704 if amt is None:
705 chunk = self._fp._safe_read(self.chunk_left)
706 returned_chunk = chunk
707 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
708 self.chunk_left = None
709 elif amt < self.chunk_left:
710 value = self._fp._safe_read(amt)
711 self.chunk_left = self.chunk_left - amt
712 returned_chunk = value
713 elif amt == self.chunk_left:
714 value = self._fp._safe_read(amt)
715 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
716 self.chunk_left = None
717 returned_chunk = value
718 else: # amt > self.chunk_left
719 returned_chunk = self._fp._safe_read(self.chunk_left)
720 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
721 self.chunk_left = None
722 return returned_chunk
723
724 def read_chunked(self, amt=None, decode_content=None):
725 """
726 Similar to :meth:`HTTPResponse.read`, but with an additional
727 parameter: ``decode_content``.
728
729 :param amt:
730 How much of the content to read. If specified, caching is skipped
731 because it doesn't make sense to cache partial content as the full
732 response.
733
734 :param decode_content:
735 If True, will attempt to decode the body based on the
736 'content-encoding' header.
737 """
738 self._init_decoder()
739 # FIXME: Rewrite this method and make it a class with a better structured logic.
740 if not self.chunked:
741 raise ResponseNotChunked(
742 "Response is not chunked. "
743 "Header 'transfer-encoding: chunked' is missing."
744 )
745 if not self.supports_chunked_reads():
746 raise BodyNotHttplibCompatible(
747 "Body should be httplib.HTTPResponse like. "
748 "It should have have an fp attribute which returns raw chunks."
749 )
750
751 with self._error_catcher():
752 # Don't bother reading the body of a HEAD request.
753 if self._original_response and is_response_to_head(self._original_response):
754 self._original_response.close()
755 return
756
757 # If a response is already read and closed
758 # then return immediately.
759 if self._fp.fp is None:
760 return
761
762 while True:
763 self._update_chunk_length()
764 if self.chunk_left == 0:
765 break
766 chunk = self._handle_chunk(amt)
767 decoded = self._decode(
768 chunk, decode_content=decode_content, flush_decoder=False
769 )
770 if decoded:
771 yield decoded
772
773 if decode_content:
774 # On CPython and PyPy, we should never need to flush the
775 # decoder. However, on Jython we *might* need to, so
776 # lets defensively do it anyway.
777 decoded = self._flush_decoder()
778 if decoded: # Platform-specific: Jython.
779 yield decoded
780
781 # Chunk content ends with \r\n: discard it.
782 while True:
783 line = self._fp.fp.readline()
784 if not line:
785 # Some sites may not end with '\r\n'.
786 break
787 if line == b"\r\n":
788 break
789
790 # We read everything; close the "file".
791 if self._original_response:
792 self._original_response.close()
793
794 def geturl(self):
795 """
796 Returns the URL that was the source of this response.
797 If the request that generated this response redirected, this method
798 will return the final redirect location.
799 """
800 if self.retries is not None and len(self.retries.history):
801 return self.retries.history[-1].redirect_location
802 else:
803 return self._request_url
804
805 def __iter__(self):
806 buffer = []
807 for chunk in self.stream(decode_content=True):
808 if b"\n" in chunk:
809 chunk = chunk.split(b"\n")
810 yield b"".join(buffer) + chunk[0] + b"\n"
811 for x in chunk[1:-1]:
812 yield x + b"\n"
813 if chunk[-1]:
814 buffer = [chunk[-1]]
815 else:
816 buffer = []
817 else:
818 buffer.append(chunk)
819 if buffer:
820 yield b"".join(buffer)