Mercurial > repos > guerler > hhblits
comparison lib/python3.8/site-packages/pip/_internal/network/download.py @ 0:9e54283cc701 draft
"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author | guerler |
---|---|
date | Mon, 27 Jul 2020 03:47:31 -0400 (2020-07-27) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9e54283cc701 |
---|---|
1 """Download files with progress indicators. | |
2 """ | |
3 import cgi | |
4 import logging | |
5 import mimetypes | |
6 import os | |
7 | |
8 from pip._vendor import requests | |
9 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE | |
10 | |
11 from pip._internal.models.index import PyPI | |
12 from pip._internal.network.cache import is_from_cache | |
13 from pip._internal.network.utils import response_chunks | |
14 from pip._internal.utils.misc import ( | |
15 format_size, | |
16 redact_auth_from_url, | |
17 splitext, | |
18 ) | |
19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
20 from pip._internal.utils.ui import DownloadProgressProvider | |
21 | |
22 if MYPY_CHECK_RUNNING: | |
23 from typing import Iterable, Optional | |
24 | |
25 from pip._vendor.requests.models import Response | |
26 | |
27 from pip._internal.models.link import Link | |
28 from pip._internal.network.session import PipSession | |
29 | |
30 logger = logging.getLogger(__name__) | |
31 | |
32 | |
33 def _get_http_response_size(resp): | |
34 # type: (Response) -> Optional[int] | |
35 try: | |
36 return int(resp.headers['content-length']) | |
37 except (ValueError, KeyError, TypeError): | |
38 return None | |
39 | |
40 | |
41 def _prepare_download( | |
42 resp, # type: Response | |
43 link, # type: Link | |
44 progress_bar # type: str | |
45 ): | |
46 # type: (...) -> Iterable[bytes] | |
47 total_length = _get_http_response_size(resp) | |
48 | |
49 if link.netloc == PyPI.file_storage_domain: | |
50 url = link.show_url | |
51 else: | |
52 url = link.url_without_fragment | |
53 | |
54 logged_url = redact_auth_from_url(url) | |
55 | |
56 if total_length: | |
57 logged_url = '{} ({})'.format(logged_url, format_size(total_length)) | |
58 | |
59 if is_from_cache(resp): | |
60 logger.info("Using cached %s", logged_url) | |
61 else: | |
62 logger.info("Downloading %s", logged_url) | |
63 | |
64 if logger.getEffectiveLevel() > logging.INFO: | |
65 show_progress = False | |
66 elif is_from_cache(resp): | |
67 show_progress = False | |
68 elif not total_length: | |
69 show_progress = True | |
70 elif total_length > (40 * 1000): | |
71 show_progress = True | |
72 else: | |
73 show_progress = False | |
74 | |
75 chunks = response_chunks(resp, CONTENT_CHUNK_SIZE) | |
76 | |
77 if not show_progress: | |
78 return chunks | |
79 | |
80 return DownloadProgressProvider( | |
81 progress_bar, max=total_length | |
82 )(chunks) | |
83 | |
84 | |
85 def sanitize_content_filename(filename): | |
86 # type: (str) -> str | |
87 """ | |
88 Sanitize the "filename" value from a Content-Disposition header. | |
89 """ | |
90 return os.path.basename(filename) | |
91 | |
92 | |
93 def parse_content_disposition(content_disposition, default_filename): | |
94 # type: (str, str) -> str | |
95 """ | |
96 Parse the "filename" value from a Content-Disposition header, and | |
97 return the default filename if the result is empty. | |
98 """ | |
99 _type, params = cgi.parse_header(content_disposition) | |
100 filename = params.get('filename') | |
101 if filename: | |
102 # We need to sanitize the filename to prevent directory traversal | |
103 # in case the filename contains ".." path parts. | |
104 filename = sanitize_content_filename(filename) | |
105 return filename or default_filename | |
106 | |
107 | |
108 def _get_http_response_filename(resp, link): | |
109 # type: (Response, Link) -> str | |
110 """Get an ideal filename from the given HTTP response, falling back to | |
111 the link filename if not provided. | |
112 """ | |
113 filename = link.filename # fallback | |
114 # Have a look at the Content-Disposition header for a better guess | |
115 content_disposition = resp.headers.get('content-disposition') | |
116 if content_disposition: | |
117 filename = parse_content_disposition(content_disposition, filename) | |
118 ext = splitext(filename)[1] # type: Optional[str] | |
119 if not ext: | |
120 ext = mimetypes.guess_extension( | |
121 resp.headers.get('content-type', '') | |
122 ) | |
123 if ext: | |
124 filename += ext | |
125 if not ext and link.url != resp.url: | |
126 ext = os.path.splitext(resp.url)[1] | |
127 if ext: | |
128 filename += ext | |
129 return filename | |
130 | |
131 | |
132 def _http_get_download(session, link): | |
133 # type: (PipSession, Link) -> Response | |
134 target_url = link.url.split('#', 1)[0] | |
135 resp = session.get( | |
136 target_url, | |
137 # We use Accept-Encoding: identity here because requests | |
138 # defaults to accepting compressed responses. This breaks in | |
139 # a variety of ways depending on how the server is configured. | |
140 # - Some servers will notice that the file isn't a compressible | |
141 # file and will leave the file alone and with an empty | |
142 # Content-Encoding | |
143 # - Some servers will notice that the file is already | |
144 # compressed and will leave the file alone and will add a | |
145 # Content-Encoding: gzip header | |
146 # - Some servers won't notice anything at all and will take | |
147 # a file that's already been compressed and compress it again | |
148 # and set the Content-Encoding: gzip header | |
149 # By setting this to request only the identity encoding We're | |
150 # hoping to eliminate the third case. Hopefully there does not | |
151 # exist a server which when given a file will notice it is | |
152 # already compressed and that you're not asking for a | |
153 # compressed file and will then decompress it before sending | |
154 # because if that's the case I don't think it'll ever be | |
155 # possible to make this work. | |
156 headers={"Accept-Encoding": "identity"}, | |
157 stream=True, | |
158 ) | |
159 resp.raise_for_status() | |
160 return resp | |
161 | |
162 | |
163 class Download(object): | |
164 def __init__( | |
165 self, | |
166 response, # type: Response | |
167 filename, # type: str | |
168 chunks, # type: Iterable[bytes] | |
169 ): | |
170 # type: (...) -> None | |
171 self.response = response | |
172 self.filename = filename | |
173 self.chunks = chunks | |
174 | |
175 | |
176 class Downloader(object): | |
177 def __init__( | |
178 self, | |
179 session, # type: PipSession | |
180 progress_bar, # type: str | |
181 ): | |
182 # type: (...) -> None | |
183 self._session = session | |
184 self._progress_bar = progress_bar | |
185 | |
186 def __call__(self, link): | |
187 # type: (Link) -> Download | |
188 try: | |
189 resp = _http_get_download(self._session, link) | |
190 except requests.HTTPError as e: | |
191 logger.critical( | |
192 "HTTP error %s while getting %s", e.response.status_code, link | |
193 ) | |
194 raise | |
195 | |
196 return Download( | |
197 resp, | |
198 _get_http_response_filename(resp, link), | |
199 _prepare_download(resp, link, self._progress_bar), | |
200 ) |