Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/pip/_internal/download.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import cgi | |
4 import email.utils | |
5 import json | |
6 import logging | |
7 import mimetypes | |
8 import os | |
9 import platform | |
10 import re | |
11 import shutil | |
12 import sys | |
13 | |
14 from pip._vendor import requests, urllib3 | |
15 from pip._vendor.cachecontrol import CacheControlAdapter | |
16 from pip._vendor.cachecontrol.caches import FileCache | |
17 from pip._vendor.lockfile import LockError | |
18 from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter | |
19 from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth | |
20 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response | |
21 from pip._vendor.requests.structures import CaseInsensitiveDict | |
22 from pip._vendor.requests.utils import get_netrc_auth | |
23 # NOTE: XMLRPC Client is not annotated in typeshed as on 2017-07-17, which is | |
24 # why we ignore the type on this import | |
25 from pip._vendor.six.moves import xmlrpc_client # type: ignore | |
26 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
27 from pip._vendor.six.moves.urllib import request as urllib_request | |
28 | |
29 import pip | |
30 from pip._internal.exceptions import HashMismatch, InstallationError | |
31 from pip._internal.models.index import PyPI | |
32 # Import ssl from compat so the initial import occurs in only one place. | |
33 from pip._internal.utils.compat import HAS_TLS, ssl | |
34 from pip._internal.utils.encoding import auto_decode | |
35 from pip._internal.utils.filesystem import check_path_owner | |
36 from pip._internal.utils.glibc import libc_ver | |
37 from pip._internal.utils.marker_files import write_delete_marker_file | |
38 from pip._internal.utils.misc import ( | |
39 ARCHIVE_EXTENSIONS, ask, ask_input, ask_password, ask_path_exists, | |
40 backup_dir, consume, display_path, format_size, get_installed_version, | |
41 path_to_url, remove_auth_from_url, rmtree, split_auth_netloc_from_url, | |
42 splitext, unpack_file, | |
43 ) | |
44 from pip._internal.utils.temp_dir import TempDirectory | |
45 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
46 from pip._internal.utils.ui import DownloadProgressProvider | |
47 from pip._internal.vcs import vcs | |
48 | |
49 if MYPY_CHECK_RUNNING: | |
50 from typing import ( | |
51 Optional, Tuple, Dict, IO, Text, Union | |
52 ) | |
53 from optparse import Values | |
54 from pip._internal.models.link import Link | |
55 from pip._internal.utils.hashes import Hashes | |
56 from pip._internal.vcs.versioncontrol import AuthInfo, VersionControl | |
57 | |
58 Credentials = Tuple[str, str, str] | |
59 | |
60 | |
61 __all__ = ['get_file_content', | |
62 'is_url', 'url_to_path', 'path_to_url', | |
63 'is_archive_file', 'unpack_vcs_link', | |
64 'unpack_file_url', 'is_vcs_url', 'is_file_url', | |
65 'unpack_http_url', 'unpack_url', | |
66 'parse_content_disposition', 'sanitize_content_filename'] | |
67 | |
68 | |
69 logger = logging.getLogger(__name__) | |
70 | |
71 | |
72 try: | |
73 import keyring # noqa | |
74 except ImportError: | |
75 keyring = None | |
76 except Exception as exc: | |
77 logger.warning("Keyring is skipped due to an exception: %s", | |
78 str(exc)) | |
79 keyring = None | |
80 | |
81 # These are environment variables present when running under various | |
82 # CI systems. For each variable, some CI systems that use the variable | |
83 # are indicated. The collection was chosen so that for each of a number | |
84 # of popular systems, at least one of the environment variables is used. | |
85 # This list is used to provide some indication of and lower bound for | |
86 # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive. | |
87 # For more background, see: https://github.com/pypa/pip/issues/5499 | |
88 CI_ENVIRONMENT_VARIABLES = ( | |
89 # Azure Pipelines | |
90 'BUILD_BUILDID', | |
91 # Jenkins | |
92 'BUILD_ID', | |
93 # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI | |
94 'CI', | |
95 # Explicit environment variable. | |
96 'PIP_IS_CI', | |
97 ) | |
98 | |
99 | |
100 def looks_like_ci(): | |
101 # type: () -> bool | |
102 """ | |
103 Return whether it looks like pip is running under CI. | |
104 """ | |
105 # We don't use the method of checking for a tty (e.g. using isatty()) | |
106 # because some CI systems mimic a tty (e.g. Travis CI). Thus that | |
107 # method doesn't provide definitive information in either direction. | |
108 return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES) | |
109 | |
110 | |
111 def user_agent(): | |
112 """ | |
113 Return a string representing the user agent. | |
114 """ | |
115 data = { | |
116 "installer": {"name": "pip", "version": pip.__version__}, | |
117 "python": platform.python_version(), | |
118 "implementation": { | |
119 "name": platform.python_implementation(), | |
120 }, | |
121 } | |
122 | |
123 if data["implementation"]["name"] == 'CPython': | |
124 data["implementation"]["version"] = platform.python_version() | |
125 elif data["implementation"]["name"] == 'PyPy': | |
126 if sys.pypy_version_info.releaselevel == 'final': | |
127 pypy_version_info = sys.pypy_version_info[:3] | |
128 else: | |
129 pypy_version_info = sys.pypy_version_info | |
130 data["implementation"]["version"] = ".".join( | |
131 [str(x) for x in pypy_version_info] | |
132 ) | |
133 elif data["implementation"]["name"] == 'Jython': | |
134 # Complete Guess | |
135 data["implementation"]["version"] = platform.python_version() | |
136 elif data["implementation"]["name"] == 'IronPython': | |
137 # Complete Guess | |
138 data["implementation"]["version"] = platform.python_version() | |
139 | |
140 if sys.platform.startswith("linux"): | |
141 from pip._vendor import distro | |
142 distro_infos = dict(filter( | |
143 lambda x: x[1], | |
144 zip(["name", "version", "id"], distro.linux_distribution()), | |
145 )) | |
146 libc = dict(filter( | |
147 lambda x: x[1], | |
148 zip(["lib", "version"], libc_ver()), | |
149 )) | |
150 if libc: | |
151 distro_infos["libc"] = libc | |
152 if distro_infos: | |
153 data["distro"] = distro_infos | |
154 | |
155 if sys.platform.startswith("darwin") and platform.mac_ver()[0]: | |
156 data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]} | |
157 | |
158 if platform.system(): | |
159 data.setdefault("system", {})["name"] = platform.system() | |
160 | |
161 if platform.release(): | |
162 data.setdefault("system", {})["release"] = platform.release() | |
163 | |
164 if platform.machine(): | |
165 data["cpu"] = platform.machine() | |
166 | |
167 if HAS_TLS: | |
168 data["openssl_version"] = ssl.OPENSSL_VERSION | |
169 | |
170 setuptools_version = get_installed_version("setuptools") | |
171 if setuptools_version is not None: | |
172 data["setuptools_version"] = setuptools_version | |
173 | |
174 # Use None rather than False so as not to give the impression that | |
175 # pip knows it is not being run under CI. Rather, it is a null or | |
176 # inconclusive result. Also, we include some value rather than no | |
177 # value to make it easier to know that the check has been run. | |
178 data["ci"] = True if looks_like_ci() else None | |
179 | |
180 user_data = os.environ.get("PIP_USER_AGENT_USER_DATA") | |
181 if user_data is not None: | |
182 data["user_data"] = user_data | |
183 | |
184 return "{data[installer][name]}/{data[installer][version]} {json}".format( | |
185 data=data, | |
186 json=json.dumps(data, separators=(",", ":"), sort_keys=True), | |
187 ) | |
188 | |
189 | |
190 def _get_keyring_auth(url, username): | |
191 """Return the tuple auth for a given url from keyring.""" | |
192 if not url or not keyring: | |
193 return None | |
194 | |
195 try: | |
196 try: | |
197 get_credential = keyring.get_credential | |
198 except AttributeError: | |
199 pass | |
200 else: | |
201 logger.debug("Getting credentials from keyring for %s", url) | |
202 cred = get_credential(url, username) | |
203 if cred is not None: | |
204 return cred.username, cred.password | |
205 return None | |
206 | |
207 if username: | |
208 logger.debug("Getting password from keyring for %s", url) | |
209 password = keyring.get_password(url, username) | |
210 if password: | |
211 return username, password | |
212 | |
213 except Exception as exc: | |
214 logger.warning("Keyring is skipped due to an exception: %s", | |
215 str(exc)) | |
216 | |
217 | |
218 class MultiDomainBasicAuth(AuthBase): | |
219 | |
220 def __init__(self, prompting=True, index_urls=None): | |
221 # type: (bool, Optional[Values]) -> None | |
222 self.prompting = prompting | |
223 self.index_urls = index_urls | |
224 self.passwords = {} # type: Dict[str, AuthInfo] | |
225 # When the user is prompted to enter credentials and keyring is | |
226 # available, we will offer to save them. If the user accepts, | |
227 # this value is set to the credentials they entered. After the | |
228 # request authenticates, the caller should call | |
229 # ``save_credentials`` to save these. | |
230 self._credentials_to_save = None # type: Optional[Credentials] | |
231 | |
232 def _get_index_url(self, url): | |
233 """Return the original index URL matching the requested URL. | |
234 | |
235 Cached or dynamically generated credentials may work against | |
236 the original index URL rather than just the netloc. | |
237 | |
238 The provided url should have had its username and password | |
239 removed already. If the original index url had credentials then | |
240 they will be included in the return value. | |
241 | |
242 Returns None if no matching index was found, or if --no-index | |
243 was specified by the user. | |
244 """ | |
245 if not url or not self.index_urls: | |
246 return None | |
247 | |
248 for u in self.index_urls: | |
249 prefix = remove_auth_from_url(u).rstrip("/") + "/" | |
250 if url.startswith(prefix): | |
251 return u | |
252 | |
253 def _get_new_credentials(self, original_url, allow_netrc=True, | |
254 allow_keyring=True): | |
255 """Find and return credentials for the specified URL.""" | |
256 # Split the credentials and netloc from the url. | |
257 url, netloc, url_user_password = split_auth_netloc_from_url( | |
258 original_url) | |
259 | |
260 # Start with the credentials embedded in the url | |
261 username, password = url_user_password | |
262 if username is not None and password is not None: | |
263 logger.debug("Found credentials in url for %s", netloc) | |
264 return url_user_password | |
265 | |
266 # Find a matching index url for this request | |
267 index_url = self._get_index_url(url) | |
268 if index_url: | |
269 # Split the credentials from the url. | |
270 index_info = split_auth_netloc_from_url(index_url) | |
271 if index_info: | |
272 index_url, _, index_url_user_password = index_info | |
273 logger.debug("Found index url %s", index_url) | |
274 | |
275 # If an index URL was found, try its embedded credentials | |
276 if index_url and index_url_user_password[0] is not None: | |
277 username, password = index_url_user_password | |
278 if username is not None and password is not None: | |
279 logger.debug("Found credentials in index url for %s", netloc) | |
280 return index_url_user_password | |
281 | |
282 # Get creds from netrc if we still don't have them | |
283 if allow_netrc: | |
284 netrc_auth = get_netrc_auth(original_url) | |
285 if netrc_auth: | |
286 logger.debug("Found credentials in netrc for %s", netloc) | |
287 return netrc_auth | |
288 | |
289 # If we don't have a password and keyring is available, use it. | |
290 if allow_keyring: | |
291 # The index url is more specific than the netloc, so try it first | |
292 kr_auth = (_get_keyring_auth(index_url, username) or | |
293 _get_keyring_auth(netloc, username)) | |
294 if kr_auth: | |
295 logger.debug("Found credentials in keyring for %s", netloc) | |
296 return kr_auth | |
297 | |
298 return username, password | |
299 | |
300 def _get_url_and_credentials(self, original_url): | |
301 """Return the credentials to use for the provided URL. | |
302 | |
303 If allowed, netrc and keyring may be used to obtain the | |
304 correct credentials. | |
305 | |
306 Returns (url_without_credentials, username, password). Note | |
307 that even if the original URL contains credentials, this | |
308 function may return a different username and password. | |
309 """ | |
310 url, netloc, _ = split_auth_netloc_from_url(original_url) | |
311 | |
312 # Use any stored credentials that we have for this netloc | |
313 username, password = self.passwords.get(netloc, (None, None)) | |
314 | |
315 if username is None and password is None: | |
316 # No stored credentials. Acquire new credentials without prompting | |
317 # the user. (e.g. from netrc, keyring, or the URL itself) | |
318 username, password = self._get_new_credentials(original_url) | |
319 | |
320 if username is not None or password is not None: | |
321 # Convert the username and password if they're None, so that | |
322 # this netloc will show up as "cached" in the conditional above. | |
323 # Further, HTTPBasicAuth doesn't accept None, so it makes sense to | |
324 # cache the value that is going to be used. | |
325 username = username or "" | |
326 password = password or "" | |
327 | |
328 # Store any acquired credentials. | |
329 self.passwords[netloc] = (username, password) | |
330 | |
331 assert ( | |
332 # Credentials were found | |
333 (username is not None and password is not None) or | |
334 # Credentials were not found | |
335 (username is None and password is None) | |
336 ), "Could not load credentials from url: {}".format(original_url) | |
337 | |
338 return url, username, password | |
339 | |
340 def __call__(self, req): | |
341 # Get credentials for this request | |
342 url, username, password = self._get_url_and_credentials(req.url) | |
343 | |
344 # Set the url of the request to the url without any credentials | |
345 req.url = url | |
346 | |
347 if username is not None and password is not None: | |
348 # Send the basic auth with this request | |
349 req = HTTPBasicAuth(username, password)(req) | |
350 | |
351 # Attach a hook to handle 401 responses | |
352 req.register_hook("response", self.handle_401) | |
353 | |
354 return req | |
355 | |
356 # Factored out to allow for easy patching in tests | |
357 def _prompt_for_password(self, netloc): | |
358 username = ask_input("User for %s: " % netloc) | |
359 if not username: | |
360 return None, None | |
361 auth = _get_keyring_auth(netloc, username) | |
362 if auth: | |
363 return auth[0], auth[1], False | |
364 password = ask_password("Password: ") | |
365 return username, password, True | |
366 | |
367 # Factored out to allow for easy patching in tests | |
368 def _should_save_password_to_keyring(self): | |
369 if not keyring: | |
370 return False | |
371 return ask("Save credentials to keyring [y/N]: ", ["y", "n"]) == "y" | |
372 | |
373 def handle_401(self, resp, **kwargs): | |
374 # We only care about 401 responses, anything else we want to just | |
375 # pass through the actual response | |
376 if resp.status_code != 401: | |
377 return resp | |
378 | |
379 # We are not able to prompt the user so simply return the response | |
380 if not self.prompting: | |
381 return resp | |
382 | |
383 parsed = urllib_parse.urlparse(resp.url) | |
384 | |
385 # Prompt the user for a new username and password | |
386 username, password, save = self._prompt_for_password(parsed.netloc) | |
387 | |
388 # Store the new username and password to use for future requests | |
389 self._credentials_to_save = None | |
390 if username is not None and password is not None: | |
391 self.passwords[parsed.netloc] = (username, password) | |
392 | |
393 # Prompt to save the password to keyring | |
394 if save and self._should_save_password_to_keyring(): | |
395 self._credentials_to_save = (parsed.netloc, username, password) | |
396 | |
397 # Consume content and release the original connection to allow our new | |
398 # request to reuse the same one. | |
399 resp.content | |
400 resp.raw.release_conn() | |
401 | |
402 # Add our new username and password to the request | |
403 req = HTTPBasicAuth(username or "", password or "")(resp.request) | |
404 req.register_hook("response", self.warn_on_401) | |
405 | |
406 # On successful request, save the credentials that were used to | |
407 # keyring. (Note that if the user responded "no" above, this member | |
408 # is not set and nothing will be saved.) | |
409 if self._credentials_to_save: | |
410 req.register_hook("response", self.save_credentials) | |
411 | |
412 # Send our new request | |
413 new_resp = resp.connection.send(req, **kwargs) | |
414 new_resp.history.append(resp) | |
415 | |
416 return new_resp | |
417 | |
418 def warn_on_401(self, resp, **kwargs): | |
419 """Response callback to warn about incorrect credentials.""" | |
420 if resp.status_code == 401: | |
421 logger.warning('401 Error, Credentials not correct for %s', | |
422 resp.request.url) | |
423 | |
424 def save_credentials(self, resp, **kwargs): | |
425 """Response callback to save credentials on success.""" | |
426 assert keyring is not None, "should never reach here without keyring" | |
427 if not keyring: | |
428 return | |
429 | |
430 creds = self._credentials_to_save | |
431 self._credentials_to_save = None | |
432 if creds and resp.status_code < 400: | |
433 try: | |
434 logger.info('Saving credentials to keyring') | |
435 keyring.set_password(*creds) | |
436 except Exception: | |
437 logger.exception('Failed to save credentials') | |
438 | |
439 | |
440 class LocalFSAdapter(BaseAdapter): | |
441 | |
442 def send(self, request, stream=None, timeout=None, verify=None, cert=None, | |
443 proxies=None): | |
444 pathname = url_to_path(request.url) | |
445 | |
446 resp = Response() | |
447 resp.status_code = 200 | |
448 resp.url = request.url | |
449 | |
450 try: | |
451 stats = os.stat(pathname) | |
452 except OSError as exc: | |
453 resp.status_code = 404 | |
454 resp.raw = exc | |
455 else: | |
456 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) | |
457 content_type = mimetypes.guess_type(pathname)[0] or "text/plain" | |
458 resp.headers = CaseInsensitiveDict({ | |
459 "Content-Type": content_type, | |
460 "Content-Length": stats.st_size, | |
461 "Last-Modified": modified, | |
462 }) | |
463 | |
464 resp.raw = open(pathname, "rb") | |
465 resp.close = resp.raw.close | |
466 | |
467 return resp | |
468 | |
469 def close(self): | |
470 pass | |
471 | |
472 | |
473 class SafeFileCache(FileCache): | |
474 """ | |
475 A file based cache which is safe to use even when the target directory may | |
476 not be accessible or writable. | |
477 """ | |
478 | |
479 def __init__(self, *args, **kwargs): | |
480 super(SafeFileCache, self).__init__(*args, **kwargs) | |
481 | |
482 # Check to ensure that the directory containing our cache directory | |
483 # is owned by the user current executing pip. If it does not exist | |
484 # we will check the parent directory until we find one that does exist. | |
485 # If it is not owned by the user executing pip then we will disable | |
486 # the cache and log a warning. | |
487 if not check_path_owner(self.directory): | |
488 logger.warning( | |
489 "The directory '%s' or its parent directory is not owned by " | |
490 "the current user and the cache has been disabled. Please " | |
491 "check the permissions and owner of that directory. If " | |
492 "executing pip with sudo, you may want sudo's -H flag.", | |
493 self.directory, | |
494 ) | |
495 | |
496 # Set our directory to None to disable the Cache | |
497 self.directory = None | |
498 | |
499 def get(self, *args, **kwargs): | |
500 # If we don't have a directory, then the cache should be a no-op. | |
501 if self.directory is None: | |
502 return | |
503 | |
504 try: | |
505 return super(SafeFileCache, self).get(*args, **kwargs) | |
506 except (LockError, OSError, IOError): | |
507 # We intentionally silence this error, if we can't access the cache | |
508 # then we can just skip caching and process the request as if | |
509 # caching wasn't enabled. | |
510 pass | |
511 | |
512 def set(self, *args, **kwargs): | |
513 # If we don't have a directory, then the cache should be a no-op. | |
514 if self.directory is None: | |
515 return | |
516 | |
517 try: | |
518 return super(SafeFileCache, self).set(*args, **kwargs) | |
519 except (LockError, OSError, IOError): | |
520 # We intentionally silence this error, if we can't access the cache | |
521 # then we can just skip caching and process the request as if | |
522 # caching wasn't enabled. | |
523 pass | |
524 | |
525 def delete(self, *args, **kwargs): | |
526 # If we don't have a directory, then the cache should be a no-op. | |
527 if self.directory is None: | |
528 return | |
529 | |
530 try: | |
531 return super(SafeFileCache, self).delete(*args, **kwargs) | |
532 except (LockError, OSError, IOError): | |
533 # We intentionally silence this error, if we can't access the cache | |
534 # then we can just skip caching and process the request as if | |
535 # caching wasn't enabled. | |
536 pass | |
537 | |
538 | |
539 class InsecureHTTPAdapter(HTTPAdapter): | |
540 | |
541 def cert_verify(self, conn, url, verify, cert): | |
542 conn.cert_reqs = 'CERT_NONE' | |
543 conn.ca_certs = None | |
544 | |
545 | |
546 class PipSession(requests.Session): | |
547 | |
548 timeout = None # type: Optional[int] | |
549 | |
550 def __init__(self, *args, **kwargs): | |
551 retries = kwargs.pop("retries", 0) | |
552 cache = kwargs.pop("cache", None) | |
553 insecure_hosts = kwargs.pop("insecure_hosts", []) | |
554 index_urls = kwargs.pop("index_urls", None) | |
555 | |
556 super(PipSession, self).__init__(*args, **kwargs) | |
557 | |
558 # Attach our User Agent to the request | |
559 self.headers["User-Agent"] = user_agent() | |
560 | |
561 # Attach our Authentication handler to the session | |
562 self.auth = MultiDomainBasicAuth(index_urls=index_urls) | |
563 | |
564 # Create our urllib3.Retry instance which will allow us to customize | |
565 # how we handle retries. | |
566 retries = urllib3.Retry( | |
567 # Set the total number of retries that a particular request can | |
568 # have. | |
569 total=retries, | |
570 | |
571 # A 503 error from PyPI typically means that the Fastly -> Origin | |
572 # connection got interrupted in some way. A 503 error in general | |
573 # is typically considered a transient error so we'll go ahead and | |
574 # retry it. | |
575 # A 500 may indicate transient error in Amazon S3 | |
576 # A 520 or 527 - may indicate transient error in CloudFlare | |
577 status_forcelist=[500, 503, 520, 527], | |
578 | |
579 # Add a small amount of back off between failed requests in | |
580 # order to prevent hammering the service. | |
581 backoff_factor=0.25, | |
582 ) | |
583 | |
584 # We want to _only_ cache responses on securely fetched origins. We do | |
585 # this because we can't validate the response of an insecurely fetched | |
586 # origin, and we don't want someone to be able to poison the cache and | |
587 # require manual eviction from the cache to fix it. | |
588 if cache: | |
589 secure_adapter = CacheControlAdapter( | |
590 cache=SafeFileCache(cache, use_dir_lock=True), | |
591 max_retries=retries, | |
592 ) | |
593 else: | |
594 secure_adapter = HTTPAdapter(max_retries=retries) | |
595 | |
596 # Our Insecure HTTPAdapter disables HTTPS validation. It does not | |
597 # support caching (see above) so we'll use it for all http:// URLs as | |
598 # well as any https:// host that we've marked as ignoring TLS errors | |
599 # for. | |
600 insecure_adapter = InsecureHTTPAdapter(max_retries=retries) | |
601 # Save this for later use in add_insecure_host(). | |
602 self._insecure_adapter = insecure_adapter | |
603 | |
604 self.mount("https://", secure_adapter) | |
605 self.mount("http://", insecure_adapter) | |
606 | |
607 # Enable file:// urls | |
608 self.mount("file://", LocalFSAdapter()) | |
609 | |
610 # We want to use a non-validating adapter for any requests which are | |
611 # deemed insecure. | |
612 for host in insecure_hosts: | |
613 self.add_insecure_host(host) | |
614 | |
615 def add_insecure_host(self, host): | |
616 # type: (str) -> None | |
617 self.mount('https://{}/'.format(host), self._insecure_adapter) | |
618 | |
619 def request(self, method, url, *args, **kwargs): | |
620 # Allow setting a default timeout on a session | |
621 kwargs.setdefault("timeout", self.timeout) | |
622 | |
623 # Dispatch the actual request | |
624 return super(PipSession, self).request(method, url, *args, **kwargs) | |
625 | |
626 | |
627 def get_file_content(url, comes_from=None, session=None): | |
628 # type: (str, Optional[str], Optional[PipSession]) -> Tuple[str, Text] | |
629 """Gets the content of a file; it may be a filename, file: URL, or | |
630 http: URL. Returns (location, content). Content is unicode. | |
631 | |
632 :param url: File path or url. | |
633 :param comes_from: Origin description of requirements. | |
634 :param session: Instance of pip.download.PipSession. | |
635 """ | |
636 if session is None: | |
637 raise TypeError( | |
638 "get_file_content() missing 1 required keyword argument: 'session'" | |
639 ) | |
640 | |
641 match = _scheme_re.search(url) | |
642 if match: | |
643 scheme = match.group(1).lower() | |
644 if (scheme == 'file' and comes_from and | |
645 comes_from.startswith('http')): | |
646 raise InstallationError( | |
647 'Requirements file %s references URL %s, which is local' | |
648 % (comes_from, url)) | |
649 if scheme == 'file': | |
650 path = url.split(':', 1)[1] | |
651 path = path.replace('\\', '/') | |
652 match = _url_slash_drive_re.match(path) | |
653 if match: | |
654 path = match.group(1) + ':' + path.split('|', 1)[1] | |
655 path = urllib_parse.unquote(path) | |
656 if path.startswith('/'): | |
657 path = '/' + path.lstrip('/') | |
658 url = path | |
659 else: | |
660 # FIXME: catch some errors | |
661 resp = session.get(url) | |
662 resp.raise_for_status() | |
663 return resp.url, resp.text | |
664 try: | |
665 with open(url, 'rb') as f: | |
666 content = auto_decode(f.read()) | |
667 except IOError as exc: | |
668 raise InstallationError( | |
669 'Could not open requirements file: %s' % str(exc) | |
670 ) | |
671 return url, content | |
672 | |
673 | |
674 _scheme_re = re.compile(r'^(http|https|file):', re.I) | |
675 _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I) | |
676 | |
677 | |
678 def is_url(name): | |
679 # type: (Union[str, Text]) -> bool | |
680 """Returns true if the name looks like a URL""" | |
681 if ':' not in name: | |
682 return False | |
683 scheme = name.split(':', 1)[0].lower() | |
684 return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes | |
685 | |
686 | |
687 def url_to_path(url): | |
688 # type: (str) -> str | |
689 """ | |
690 Convert a file: URL to a path. | |
691 """ | |
692 assert url.startswith('file:'), ( | |
693 "You can only turn file: urls into filenames (not %r)" % url) | |
694 | |
695 _, netloc, path, _, _ = urllib_parse.urlsplit(url) | |
696 | |
697 if not netloc or netloc == 'localhost': | |
698 # According to RFC 8089, same as empty authority. | |
699 netloc = '' | |
700 elif sys.platform == 'win32': | |
701 # If we have a UNC path, prepend UNC share notation. | |
702 netloc = '\\\\' + netloc | |
703 else: | |
704 raise ValueError( | |
705 'non-local file URIs are not supported on this platform: %r' | |
706 % url | |
707 ) | |
708 | |
709 path = urllib_request.url2pathname(netloc + path) | |
710 return path | |
711 | |
712 | |
713 def is_archive_file(name): | |
714 # type: (str) -> bool | |
715 """Return True if `name` is a considered as an archive file.""" | |
716 ext = splitext(name)[1].lower() | |
717 if ext in ARCHIVE_EXTENSIONS: | |
718 return True | |
719 return False | |
720 | |
721 | |
722 def unpack_vcs_link(link, location): | |
723 vcs_backend = _get_used_vcs_backend(link) | |
724 vcs_backend.unpack(location, url=link.url) | |
725 | |
726 | |
727 def _get_used_vcs_backend(link): | |
728 # type: (Link) -> Optional[VersionControl] | |
729 """ | |
730 Return a VersionControl object or None. | |
731 """ | |
732 for vcs_backend in vcs.backends: | |
733 if link.scheme in vcs_backend.schemes: | |
734 return vcs_backend | |
735 return None | |
736 | |
737 | |
738 def is_vcs_url(link): | |
739 # type: (Link) -> bool | |
740 return bool(_get_used_vcs_backend(link)) | |
741 | |
742 | |
743 def is_file_url(link): | |
744 # type: (Link) -> bool | |
745 return link.url.lower().startswith('file:') | |
746 | |
747 | |
748 def is_dir_url(link): | |
749 # type: (Link) -> bool | |
750 """Return whether a file:// Link points to a directory. | |
751 | |
752 ``link`` must not have any other scheme but file://. Call is_file_url() | |
753 first. | |
754 | |
755 """ | |
756 link_path = url_to_path(link.url_without_fragment) | |
757 return os.path.isdir(link_path) | |
758 | |
759 | |
760 def _progress_indicator(iterable, *args, **kwargs): | |
761 return iterable | |
762 | |
763 | |
764 def _download_url( | |
765 resp, # type: Response | |
766 link, # type: Link | |
767 content_file, # type: IO | |
768 hashes, # type: Optional[Hashes] | |
769 progress_bar # type: str | |
770 ): | |
771 # type: (...) -> None | |
772 try: | |
773 total_length = int(resp.headers['content-length']) | |
774 except (ValueError, KeyError, TypeError): | |
775 total_length = 0 | |
776 | |
777 cached_resp = getattr(resp, "from_cache", False) | |
778 if logger.getEffectiveLevel() > logging.INFO: | |
779 show_progress = False | |
780 elif cached_resp: | |
781 show_progress = False | |
782 elif total_length > (40 * 1000): | |
783 show_progress = True | |
784 elif not total_length: | |
785 show_progress = True | |
786 else: | |
787 show_progress = False | |
788 | |
789 show_url = link.show_url | |
790 | |
791 def resp_read(chunk_size): | |
792 try: | |
793 # Special case for urllib3. | |
794 for chunk in resp.raw.stream( | |
795 chunk_size, | |
796 # We use decode_content=False here because we don't | |
797 # want urllib3 to mess with the raw bytes we get | |
798 # from the server. If we decompress inside of | |
799 # urllib3 then we cannot verify the checksum | |
800 # because the checksum will be of the compressed | |
801 # file. This breakage will only occur if the | |
802 # server adds a Content-Encoding header, which | |
803 # depends on how the server was configured: | |
804 # - Some servers will notice that the file isn't a | |
805 # compressible file and will leave the file alone | |
806 # and with an empty Content-Encoding | |
807 # - Some servers will notice that the file is | |
808 # already compressed and will leave the file | |
809 # alone and will add a Content-Encoding: gzip | |
810 # header | |
811 # - Some servers won't notice anything at all and | |
812 # will take a file that's already been compressed | |
813 # and compress it again and set the | |
814 # Content-Encoding: gzip header | |
815 # | |
816 # By setting this not to decode automatically we | |
817 # hope to eliminate problems with the second case. | |
818 decode_content=False): | |
819 yield chunk | |
820 except AttributeError: | |
821 # Standard file-like object. | |
822 while True: | |
823 chunk = resp.raw.read(chunk_size) | |
824 if not chunk: | |
825 break | |
826 yield chunk | |
827 | |
828 def written_chunks(chunks): | |
829 for chunk in chunks: | |
830 content_file.write(chunk) | |
831 yield chunk | |
832 | |
833 progress_indicator = _progress_indicator | |
834 | |
835 if link.netloc == PyPI.netloc: | |
836 url = show_url | |
837 else: | |
838 url = link.url_without_fragment | |
839 | |
840 if show_progress: # We don't show progress on cached responses | |
841 progress_indicator = DownloadProgressProvider(progress_bar, | |
842 max=total_length) | |
843 if total_length: | |
844 logger.info("Downloading %s (%s)", url, format_size(total_length)) | |
845 else: | |
846 logger.info("Downloading %s", url) | |
847 elif cached_resp: | |
848 logger.info("Using cached %s", url) | |
849 else: | |
850 logger.info("Downloading %s", url) | |
851 | |
852 logger.debug('Downloading from URL %s', link) | |
853 | |
854 downloaded_chunks = written_chunks( | |
855 progress_indicator( | |
856 resp_read(CONTENT_CHUNK_SIZE), | |
857 CONTENT_CHUNK_SIZE | |
858 ) | |
859 ) | |
860 if hashes: | |
861 hashes.check_against_chunks(downloaded_chunks) | |
862 else: | |
863 consume(downloaded_chunks) | |
864 | |
865 | |
866 def _copy_file(filename, location, link): | |
867 copy = True | |
868 download_location = os.path.join(location, link.filename) | |
869 if os.path.exists(download_location): | |
870 response = ask_path_exists( | |
871 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' % | |
872 display_path(download_location), ('i', 'w', 'b', 'a')) | |
873 if response == 'i': | |
874 copy = False | |
875 elif response == 'w': | |
876 logger.warning('Deleting %s', display_path(download_location)) | |
877 os.remove(download_location) | |
878 elif response == 'b': | |
879 dest_file = backup_dir(download_location) | |
880 logger.warning( | |
881 'Backing up %s to %s', | |
882 display_path(download_location), | |
883 display_path(dest_file), | |
884 ) | |
885 shutil.move(download_location, dest_file) | |
886 elif response == 'a': | |
887 sys.exit(-1) | |
888 if copy: | |
889 shutil.copy(filename, download_location) | |
890 logger.info('Saved %s', display_path(download_location)) | |
891 | |
892 | |
893 def unpack_http_url( | |
894 link, # type: Link | |
895 location, # type: str | |
896 download_dir=None, # type: Optional[str] | |
897 session=None, # type: Optional[PipSession] | |
898 hashes=None, # type: Optional[Hashes] | |
899 progress_bar="on" # type: str | |
900 ): | |
901 # type: (...) -> None | |
902 if session is None: | |
903 raise TypeError( | |
904 "unpack_http_url() missing 1 required keyword argument: 'session'" | |
905 ) | |
906 | |
907 with TempDirectory(kind="unpack") as temp_dir: | |
908 # If a download dir is specified, is the file already downloaded there? | |
909 already_downloaded_path = None | |
910 if download_dir: | |
911 already_downloaded_path = _check_download_dir(link, | |
912 download_dir, | |
913 hashes) | |
914 | |
915 if already_downloaded_path: | |
916 from_path = already_downloaded_path | |
917 content_type = mimetypes.guess_type(from_path)[0] | |
918 else: | |
919 # let's download to a tmp dir | |
920 from_path, content_type = _download_http_url(link, | |
921 session, | |
922 temp_dir.path, | |
923 hashes, | |
924 progress_bar) | |
925 | |
926 # unpack the archive to the build dir location. even when only | |
927 # downloading archives, they have to be unpacked to parse dependencies | |
928 unpack_file(from_path, location, content_type, link) | |
929 | |
930 # a download dir is specified; let's copy the archive there | |
931 if download_dir and not already_downloaded_path: | |
932 _copy_file(from_path, download_dir, link) | |
933 | |
934 if not already_downloaded_path: | |
935 os.unlink(from_path) | |
936 | |
937 | |
938 def unpack_file_url( | |
939 link, # type: Link | |
940 location, # type: str | |
941 download_dir=None, # type: Optional[str] | |
942 hashes=None # type: Optional[Hashes] | |
943 ): | |
944 # type: (...) -> None | |
945 """Unpack link into location. | |
946 | |
947 If download_dir is provided and link points to a file, make a copy | |
948 of the link file inside download_dir. | |
949 """ | |
950 link_path = url_to_path(link.url_without_fragment) | |
951 | |
952 # If it's a url to a local directory | |
953 if is_dir_url(link): | |
954 if os.path.isdir(location): | |
955 rmtree(location) | |
956 shutil.copytree(link_path, location, symlinks=True) | |
957 if download_dir: | |
958 logger.info('Link is a directory, ignoring download_dir') | |
959 return | |
960 | |
961 # If --require-hashes is off, `hashes` is either empty, the | |
962 # link's embedded hash, or MissingHashes; it is required to | |
963 # match. If --require-hashes is on, we are satisfied by any | |
964 # hash in `hashes` matching: a URL-based or an option-based | |
965 # one; no internet-sourced hash will be in `hashes`. | |
966 if hashes: | |
967 hashes.check_against_path(link_path) | |
968 | |
969 # If a download dir is specified, is the file already there and valid? | |
970 already_downloaded_path = None | |
971 if download_dir: | |
972 already_downloaded_path = _check_download_dir(link, | |
973 download_dir, | |
974 hashes) | |
975 | |
976 if already_downloaded_path: | |
977 from_path = already_downloaded_path | |
978 else: | |
979 from_path = link_path | |
980 | |
981 content_type = mimetypes.guess_type(from_path)[0] | |
982 | |
983 # unpack the archive to the build dir location. even when only downloading | |
984 # archives, they have to be unpacked to parse dependencies | |
985 unpack_file(from_path, location, content_type, link) | |
986 | |
987 # a download dir is specified and not already downloaded | |
988 if download_dir and not already_downloaded_path: | |
989 _copy_file(from_path, download_dir, link) | |
990 | |
991 | |
992 class PipXmlrpcTransport(xmlrpc_client.Transport): | |
993 """Provide a `xmlrpclib.Transport` implementation via a `PipSession` | |
994 object. | |
995 """ | |
996 | |
997 def __init__(self, index_url, session, use_datetime=False): | |
998 xmlrpc_client.Transport.__init__(self, use_datetime) | |
999 index_parts = urllib_parse.urlparse(index_url) | |
1000 self._scheme = index_parts.scheme | |
1001 self._session = session | |
1002 | |
1003 def request(self, host, handler, request_body, verbose=False): | |
1004 parts = (self._scheme, host, handler, None, None, None) | |
1005 url = urllib_parse.urlunparse(parts) | |
1006 try: | |
1007 headers = {'Content-Type': 'text/xml'} | |
1008 response = self._session.post(url, data=request_body, | |
1009 headers=headers, stream=True) | |
1010 response.raise_for_status() | |
1011 self.verbose = verbose | |
1012 return self.parse_response(response.raw) | |
1013 except requests.HTTPError as exc: | |
1014 logger.critical( | |
1015 "HTTP error %s while getting %s", | |
1016 exc.response.status_code, url, | |
1017 ) | |
1018 raise | |
1019 | |
1020 | |
1021 def unpack_url( | |
1022 link, # type: Link | |
1023 location, # type: str | |
1024 download_dir=None, # type: Optional[str] | |
1025 only_download=False, # type: bool | |
1026 session=None, # type: Optional[PipSession] | |
1027 hashes=None, # type: Optional[Hashes] | |
1028 progress_bar="on" # type: str | |
1029 ): | |
1030 # type: (...) -> None | |
1031 """Unpack link. | |
1032 If link is a VCS link: | |
1033 if only_download, export into download_dir and ignore location | |
1034 else unpack into location | |
1035 for other types of link: | |
1036 - unpack into location | |
1037 - if download_dir, copy the file into download_dir | |
1038 - if only_download, mark location for deletion | |
1039 | |
1040 :param hashes: A Hashes object, one of whose embedded hashes must match, | |
1041 or HashMismatch will be raised. If the Hashes is empty, no matches are | |
1042 required, and unhashable types of requirements (like VCS ones, which | |
1043 would ordinarily raise HashUnsupported) are allowed. | |
1044 """ | |
1045 # non-editable vcs urls | |
1046 if is_vcs_url(link): | |
1047 unpack_vcs_link(link, location) | |
1048 | |
1049 # file urls | |
1050 elif is_file_url(link): | |
1051 unpack_file_url(link, location, download_dir, hashes=hashes) | |
1052 | |
1053 # http urls | |
1054 else: | |
1055 if session is None: | |
1056 session = PipSession() | |
1057 | |
1058 unpack_http_url( | |
1059 link, | |
1060 location, | |
1061 download_dir, | |
1062 session, | |
1063 hashes=hashes, | |
1064 progress_bar=progress_bar | |
1065 ) | |
1066 if only_download: | |
1067 write_delete_marker_file(location) | |
1068 | |
1069 | |
1070 def sanitize_content_filename(filename): | |
1071 # type: (str) -> str | |
1072 """ | |
1073 Sanitize the "filename" value from a Content-Disposition header. | |
1074 """ | |
1075 return os.path.basename(filename) | |
1076 | |
1077 | |
1078 def parse_content_disposition(content_disposition, default_filename): | |
1079 # type: (str, str) -> str | |
1080 """ | |
1081 Parse the "filename" value from a Content-Disposition header, and | |
1082 return the default filename if the result is empty. | |
1083 """ | |
1084 _type, params = cgi.parse_header(content_disposition) | |
1085 filename = params.get('filename') | |
1086 if filename: | |
1087 # We need to sanitize the filename to prevent directory traversal | |
1088 # in case the filename contains ".." path parts. | |
1089 filename = sanitize_content_filename(filename) | |
1090 return filename or default_filename | |
1091 | |
1092 | |
1093 def _download_http_url( | |
1094 link, # type: Link | |
1095 session, # type: PipSession | |
1096 temp_dir, # type: str | |
1097 hashes, # type: Optional[Hashes] | |
1098 progress_bar # type: str | |
1099 ): | |
1100 # type: (...) -> Tuple[str, str] | |
1101 """Download link url into temp_dir using provided session""" | |
1102 target_url = link.url.split('#', 1)[0] | |
1103 try: | |
1104 resp = session.get( | |
1105 target_url, | |
1106 # We use Accept-Encoding: identity here because requests | |
1107 # defaults to accepting compressed responses. This breaks in | |
1108 # a variety of ways depending on how the server is configured. | |
1109 # - Some servers will notice that the file isn't a compressible | |
1110 # file and will leave the file alone and with an empty | |
1111 # Content-Encoding | |
1112 # - Some servers will notice that the file is already | |
1113 # compressed and will leave the file alone and will add a | |
1114 # Content-Encoding: gzip header | |
1115 # - Some servers won't notice anything at all and will take | |
1116 # a file that's already been compressed and compress it again | |
1117 # and set the Content-Encoding: gzip header | |
1118 # By setting this to request only the identity encoding We're | |
1119 # hoping to eliminate the third case. Hopefully there does not | |
1120 # exist a server which when given a file will notice it is | |
1121 # already compressed and that you're not asking for a | |
1122 # compressed file and will then decompress it before sending | |
1123 # because if that's the case I don't think it'll ever be | |
1124 # possible to make this work. | |
1125 headers={"Accept-Encoding": "identity"}, | |
1126 stream=True, | |
1127 ) | |
1128 resp.raise_for_status() | |
1129 except requests.HTTPError as exc: | |
1130 logger.critical( | |
1131 "HTTP error %s while getting %s", exc.response.status_code, link, | |
1132 ) | |
1133 raise | |
1134 | |
1135 content_type = resp.headers.get('content-type', '') | |
1136 filename = link.filename # fallback | |
1137 # Have a look at the Content-Disposition header for a better guess | |
1138 content_disposition = resp.headers.get('content-disposition') | |
1139 if content_disposition: | |
1140 filename = parse_content_disposition(content_disposition, filename) | |
1141 ext = splitext(filename)[1] # type: Optional[str] | |
1142 if not ext: | |
1143 ext = mimetypes.guess_extension(content_type) | |
1144 if ext: | |
1145 filename += ext | |
1146 if not ext and link.url != resp.url: | |
1147 ext = os.path.splitext(resp.url)[1] | |
1148 if ext: | |
1149 filename += ext | |
1150 file_path = os.path.join(temp_dir, filename) | |
1151 with open(file_path, 'wb') as content_file: | |
1152 _download_url(resp, link, content_file, hashes, progress_bar) | |
1153 return file_path, content_type | |
1154 | |
1155 | |
1156 def _check_download_dir(link, download_dir, hashes): | |
1157 # type: (Link, str, Optional[Hashes]) -> Optional[str] | |
1158 """ Check download_dir for previously downloaded file with correct hash | |
1159 If a correct file is found return its path else None | |
1160 """ | |
1161 download_path = os.path.join(download_dir, link.filename) | |
1162 if os.path.exists(download_path): | |
1163 # If already downloaded, does its hash match? | |
1164 logger.info('File was already downloaded %s', download_path) | |
1165 if hashes: | |
1166 try: | |
1167 hashes.check_against_path(download_path) | |
1168 except HashMismatch: | |
1169 logger.warning( | |
1170 'Previously-downloaded file %s has bad hash. ' | |
1171 'Re-downloading.', | |
1172 download_path | |
1173 ) | |
1174 os.unlink(download_path) | |
1175 return None | |
1176 return download_path | |
1177 return None |