Mercurial > repos > guerler > hhblits
comparison lib/python3.8/site-packages/pip/_internal/network/download.py @ 0:9e54283cc701 draft
"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
| author | guerler | 
|---|---|
| date | Mon, 27 Jul 2020 03:47:31 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:9e54283cc701 | 
|---|---|
| 1 """Download files with progress indicators. | |
| 2 """ | |
| 3 import cgi | |
| 4 import logging | |
| 5 import mimetypes | |
| 6 import os | |
| 7 | |
| 8 from pip._vendor import requests | |
| 9 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE | |
| 10 | |
| 11 from pip._internal.models.index import PyPI | |
| 12 from pip._internal.network.cache import is_from_cache | |
| 13 from pip._internal.network.utils import response_chunks | |
| 14 from pip._internal.utils.misc import ( | |
| 15 format_size, | |
| 16 redact_auth_from_url, | |
| 17 splitext, | |
| 18 ) | |
| 19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
| 20 from pip._internal.utils.ui import DownloadProgressProvider | |
| 21 | |
| 22 if MYPY_CHECK_RUNNING: | |
| 23 from typing import Iterable, Optional | |
| 24 | |
| 25 from pip._vendor.requests.models import Response | |
| 26 | |
| 27 from pip._internal.models.link import Link | |
| 28 from pip._internal.network.session import PipSession | |
| 29 | |
| 30 logger = logging.getLogger(__name__) | |
| 31 | |
| 32 | |
| 33 def _get_http_response_size(resp): | |
| 34 # type: (Response) -> Optional[int] | |
| 35 try: | |
| 36 return int(resp.headers['content-length']) | |
| 37 except (ValueError, KeyError, TypeError): | |
| 38 return None | |
| 39 | |
| 40 | |
| 41 def _prepare_download( | |
| 42 resp, # type: Response | |
| 43 link, # type: Link | |
| 44 progress_bar # type: str | |
| 45 ): | |
| 46 # type: (...) -> Iterable[bytes] | |
| 47 total_length = _get_http_response_size(resp) | |
| 48 | |
| 49 if link.netloc == PyPI.file_storage_domain: | |
| 50 url = link.show_url | |
| 51 else: | |
| 52 url = link.url_without_fragment | |
| 53 | |
| 54 logged_url = redact_auth_from_url(url) | |
| 55 | |
| 56 if total_length: | |
| 57 logged_url = '{} ({})'.format(logged_url, format_size(total_length)) | |
| 58 | |
| 59 if is_from_cache(resp): | |
| 60 logger.info("Using cached %s", logged_url) | |
| 61 else: | |
| 62 logger.info("Downloading %s", logged_url) | |
| 63 | |
| 64 if logger.getEffectiveLevel() > logging.INFO: | |
| 65 show_progress = False | |
| 66 elif is_from_cache(resp): | |
| 67 show_progress = False | |
| 68 elif not total_length: | |
| 69 show_progress = True | |
| 70 elif total_length > (40 * 1000): | |
| 71 show_progress = True | |
| 72 else: | |
| 73 show_progress = False | |
| 74 | |
| 75 chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) | |
| 76 | |
| 77 if not show_progress: | |
| 78 return chunks | |
| 79 | |
| 80 return DownloadProgressProvider( | |
| 81 progress_bar, max=total_length | |
| 82 )(chunks) | |
| 83 | |
| 84 | |
| 85 def sanitize_content_filename(filename): | |
| 86 # type: (str) -> str | |
| 87 """ | |
| 88 Sanitize the "filename" value from a Content-Disposition header. | |
| 89 """ | |
| 90 return os.path.basename(filename) | |
| 91 | |
| 92 | |
| 93 def parse_content_disposition(content_disposition, default_filename): | |
| 94 # type: (str, str) -> str | |
| 95 """ | |
| 96 Parse the "filename" value from a Content-Disposition header, and | |
| 97 return the default filename if the result is empty. | |
| 98 """ | |
| 99 _type, params = cgi.parse_header(content_disposition) | |
| 100 filename = params.get('filename') | |
| 101 if filename: | |
| 102 # We need to sanitize the filename to prevent directory traversal | |
| 103 # in case the filename contains ".." path parts. | |
| 104 filename = sanitize_content_filename(filename) | |
| 105 return filename or default_filename | |
| 106 | |
| 107 | |
| 108 def _get_http_response_filename(resp, link): | |
| 109 # type: (Response, Link) -> str | |
| 110 """Get an ideal filename from the given HTTP response, falling back to | |
| 111 the link filename if not provided. | |
| 112 """ | |
| 113 filename = link.filename # fallback | |
| 114 # Have a look at the Content-Disposition header for a better guess | |
| 115 content_disposition = resp.headers.get('content-disposition') | |
| 116 if content_disposition: | |
| 117 filename = parse_content_disposition(content_disposition, filename) | |
| 118 ext = splitext(filename)[1] # type: Optional[str] | |
| 119 if not ext: | |
| 120 ext = mimetypes.guess_extension( | |
| 121 resp.headers.get('content-type', '') | |
| 122 ) | |
| 123 if ext: | |
| 124 filename += ext | |
| 125 if not ext and link.url != resp.url: | |
| 126 ext = os.path.splitext(resp.url)[1] | |
| 127 if ext: | |
| 128 filename += ext | |
| 129 return filename | |
| 130 | |
| 131 | |
| 132 def _http_get_download(session, link): | |
| 133 # type: (PipSession, Link) -> Response | |
| 134 target_url = link.url.split('#', 1)[0] | |
| 135 resp = session.get( | |
| 136 target_url, | |
| 137 # We use Accept-Encoding: identity here because requests | |
| 138 # defaults to accepting compressed responses. This breaks in | |
| 139 # a variety of ways depending on how the server is configured. | |
| 140 # - Some servers will notice that the file isn't a compressible | |
| 141 # file and will leave the file alone and with an empty | |
| 142 # Content-Encoding | |
| 143 # - Some servers will notice that the file is already | |
| 144 # compressed and will leave the file alone and will add a | |
| 145 # Content-Encoding: gzip header | |
| 146 # - Some servers won't notice anything at all and will take | |
| 147 # a file that's already been compressed and compress it again | |
| 148 # and set the Content-Encoding: gzip header | |
| 149 # By setting this to request only the identity encoding We're | |
| 150 # hoping to eliminate the third case. Hopefully there does not | |
| 151 # exist a server which when given a file will notice it is | |
| 152 # already compressed and that you're not asking for a | |
| 153 # compressed file and will then decompress it before sending | |
| 154 # because if that's the case I don't think it'll ever be | |
| 155 # possible to make this work. | |
| 156 headers={"Accept-Encoding": "identity"}, | |
| 157 stream=True, | |
| 158 ) | |
| 159 resp.raise_for_status() | |
| 160 return resp | |
| 161 | |
| 162 | |
| 163 class Download(object): | |
| 164 def __init__( | |
| 165 self, | |
| 166 response, # type: Response | |
| 167 filename, # type: str | |
| 168 chunks, # type: Iterable[bytes] | |
| 169 ): | |
| 170 # type: (...) -> None | |
| 171 self.response = response | |
| 172 self.filename = filename | |
| 173 self.chunks = chunks | |
| 174 | |
| 175 | |
| 176 class Downloader(object): | |
| 177 def __init__( | |
| 178 self, | |
| 179 session, # type: PipSession | |
| 180 progress_bar, # type: str | |
| 181 ): | |
| 182 # type: (...) -> None | |
| 183 self._session = session | |
| 184 self._progress_bar = progress_bar | |
| 185 | |
| 186 def __call__(self, link): | |
| 187 # type: (Link) -> Download | |
| 188 try: | |
| 189 resp = _http_get_download(self._session, link) | |
| 190 except requests.HTTPError as e: | |
| 191 logger.critical( | |
| 192 "HTTP error %s while getting %s", e.response.status_code, link | |
| 193 ) | |
| 194 raise | |
| 195 | |
| 196 return Download( | |
| 197 resp, | |
| 198 _get_http_response_filename(resp, link), | |
| 199 _prepare_download(resp, link, self._progress_bar), | |
| 200 ) | 
