comparison planemo/lib/python3.7/site-packages/pip/_internal/download.py @ 1:56ad4e20f292 draft

"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author guerler
date Fri, 31 Jul 2020 00:32:28 -0400
parents
children
comparison
equal deleted inserted replaced
0:d30785e31577 1:56ad4e20f292
1 from __future__ import absolute_import
2
3 import cgi
4 import email.utils
5 import json
6 import logging
7 import mimetypes
8 import os
9 import platform
10 import re
11 import shutil
12 import sys
13
14 from pip._vendor import requests, urllib3
15 from pip._vendor.cachecontrol import CacheControlAdapter
16 from pip._vendor.cachecontrol.caches import FileCache
17 from pip._vendor.lockfile import LockError
18 from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
19 from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
20 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
21 from pip._vendor.requests.structures import CaseInsensitiveDict
22 from pip._vendor.requests.utils import get_netrc_auth
23 # NOTE: XMLRPC Client is not annotated in typeshed as on 2017-07-17, which is
24 # why we ignore the type on this import
25 from pip._vendor.six.moves import xmlrpc_client # type: ignore
26 from pip._vendor.six.moves.urllib import parse as urllib_parse
27 from pip._vendor.six.moves.urllib import request as urllib_request
28
29 import pip
30 from pip._internal.exceptions import HashMismatch, InstallationError
31 from pip._internal.models.index import PyPI
32 # Import ssl from compat so the initial import occurs in only one place.
33 from pip._internal.utils.compat import HAS_TLS, ssl
34 from pip._internal.utils.encoding import auto_decode
35 from pip._internal.utils.filesystem import check_path_owner
36 from pip._internal.utils.glibc import libc_ver
37 from pip._internal.utils.marker_files import write_delete_marker_file
38 from pip._internal.utils.misc import (
39 ARCHIVE_EXTENSIONS, ask, ask_input, ask_password, ask_path_exists,
40 backup_dir, consume, display_path, format_size, get_installed_version,
41 path_to_url, remove_auth_from_url, rmtree, split_auth_netloc_from_url,
42 splitext, unpack_file,
43 )
44 from pip._internal.utils.temp_dir import TempDirectory
45 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
46 from pip._internal.utils.ui import DownloadProgressProvider
47 from pip._internal.vcs import vcs
48
49 if MYPY_CHECK_RUNNING:
50 from typing import (
51 Optional, Tuple, Dict, IO, Text, Union
52 )
53 from optparse import Values
54 from pip._internal.models.link import Link
55 from pip._internal.utils.hashes import Hashes
56 from pip._internal.vcs.versioncontrol import AuthInfo, VersionControl
57
58 Credentials = Tuple[str, str, str]
59
60
61 __all__ = ['get_file_content',
62 'is_url', 'url_to_path', 'path_to_url',
63 'is_archive_file', 'unpack_vcs_link',
64 'unpack_file_url', 'is_vcs_url', 'is_file_url',
65 'unpack_http_url', 'unpack_url',
66 'parse_content_disposition', 'sanitize_content_filename']
67
68
69 logger = logging.getLogger(__name__)
70
71
72 try:
73 import keyring # noqa
74 except ImportError:
75 keyring = None
76 except Exception as exc:
77 logger.warning("Keyring is skipped due to an exception: %s",
78 str(exc))
79 keyring = None
80
81 # These are environment variables present when running under various
82 # CI systems. For each variable, some CI systems that use the variable
83 # are indicated. The collection was chosen so that for each of a number
84 # of popular systems, at least one of the environment variables is used.
85 # This list is used to provide some indication of and lower bound for
86 # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive.
87 # For more background, see: https://github.com/pypa/pip/issues/5499
88 CI_ENVIRONMENT_VARIABLES = (
89 # Azure Pipelines
90 'BUILD_BUILDID',
91 # Jenkins
92 'BUILD_ID',
93 # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI
94 'CI',
95 # Explicit environment variable.
96 'PIP_IS_CI',
97 )
98
99
100 def looks_like_ci():
101 # type: () -> bool
102 """
103 Return whether it looks like pip is running under CI.
104 """
105 # We don't use the method of checking for a tty (e.g. using isatty())
106 # because some CI systems mimic a tty (e.g. Travis CI). Thus that
107 # method doesn't provide definitive information in either direction.
108 return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES)
109
110
111 def user_agent():
112 """
113 Return a string representing the user agent.
114 """
115 data = {
116 "installer": {"name": "pip", "version": pip.__version__},
117 "python": platform.python_version(),
118 "implementation": {
119 "name": platform.python_implementation(),
120 },
121 }
122
123 if data["implementation"]["name"] == 'CPython':
124 data["implementation"]["version"] = platform.python_version()
125 elif data["implementation"]["name"] == 'PyPy':
126 if sys.pypy_version_info.releaselevel == 'final':
127 pypy_version_info = sys.pypy_version_info[:3]
128 else:
129 pypy_version_info = sys.pypy_version_info
130 data["implementation"]["version"] = ".".join(
131 [str(x) for x in pypy_version_info]
132 )
133 elif data["implementation"]["name"] == 'Jython':
134 # Complete Guess
135 data["implementation"]["version"] = platform.python_version()
136 elif data["implementation"]["name"] == 'IronPython':
137 # Complete Guess
138 data["implementation"]["version"] = platform.python_version()
139
140 if sys.platform.startswith("linux"):
141 from pip._vendor import distro
142 distro_infos = dict(filter(
143 lambda x: x[1],
144 zip(["name", "version", "id"], distro.linux_distribution()),
145 ))
146 libc = dict(filter(
147 lambda x: x[1],
148 zip(["lib", "version"], libc_ver()),
149 ))
150 if libc:
151 distro_infos["libc"] = libc
152 if distro_infos:
153 data["distro"] = distro_infos
154
155 if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
156 data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
157
158 if platform.system():
159 data.setdefault("system", {})["name"] = platform.system()
160
161 if platform.release():
162 data.setdefault("system", {})["release"] = platform.release()
163
164 if platform.machine():
165 data["cpu"] = platform.machine()
166
167 if HAS_TLS:
168 data["openssl_version"] = ssl.OPENSSL_VERSION
169
170 setuptools_version = get_installed_version("setuptools")
171 if setuptools_version is not None:
172 data["setuptools_version"] = setuptools_version
173
174 # Use None rather than False so as not to give the impression that
175 # pip knows it is not being run under CI. Rather, it is a null or
176 # inconclusive result. Also, we include some value rather than no
177 # value to make it easier to know that the check has been run.
178 data["ci"] = True if looks_like_ci() else None
179
180 user_data = os.environ.get("PIP_USER_AGENT_USER_DATA")
181 if user_data is not None:
182 data["user_data"] = user_data
183
184 return "{data[installer][name]}/{data[installer][version]} {json}".format(
185 data=data,
186 json=json.dumps(data, separators=(",", ":"), sort_keys=True),
187 )
188
189
190 def _get_keyring_auth(url, username):
191 """Return the tuple auth for a given url from keyring."""
192 if not url or not keyring:
193 return None
194
195 try:
196 try:
197 get_credential = keyring.get_credential
198 except AttributeError:
199 pass
200 else:
201 logger.debug("Getting credentials from keyring for %s", url)
202 cred = get_credential(url, username)
203 if cred is not None:
204 return cred.username, cred.password
205 return None
206
207 if username:
208 logger.debug("Getting password from keyring for %s", url)
209 password = keyring.get_password(url, username)
210 if password:
211 return username, password
212
213 except Exception as exc:
214 logger.warning("Keyring is skipped due to an exception: %s",
215 str(exc))
216
217
218 class MultiDomainBasicAuth(AuthBase):
219
220 def __init__(self, prompting=True, index_urls=None):
221 # type: (bool, Optional[Values]) -> None
222 self.prompting = prompting
223 self.index_urls = index_urls
224 self.passwords = {} # type: Dict[str, AuthInfo]
225 # When the user is prompted to enter credentials and keyring is
226 # available, we will offer to save them. If the user accepts,
227 # this value is set to the credentials they entered. After the
228 # request authenticates, the caller should call
229 # ``save_credentials`` to save these.
230 self._credentials_to_save = None # type: Optional[Credentials]
231
232 def _get_index_url(self, url):
233 """Return the original index URL matching the requested URL.
234
235 Cached or dynamically generated credentials may work against
236 the original index URL rather than just the netloc.
237
238 The provided url should have had its username and password
239 removed already. If the original index url had credentials then
240 they will be included in the return value.
241
242 Returns None if no matching index was found, or if --no-index
243 was specified by the user.
244 """
245 if not url or not self.index_urls:
246 return None
247
248 for u in self.index_urls:
249 prefix = remove_auth_from_url(u).rstrip("/") + "/"
250 if url.startswith(prefix):
251 return u
252
253 def _get_new_credentials(self, original_url, allow_netrc=True,
254 allow_keyring=True):
255 """Find and return credentials for the specified URL."""
256 # Split the credentials and netloc from the url.
257 url, netloc, url_user_password = split_auth_netloc_from_url(
258 original_url)
259
260 # Start with the credentials embedded in the url
261 username, password = url_user_password
262 if username is not None and password is not None:
263 logger.debug("Found credentials in url for %s", netloc)
264 return url_user_password
265
266 # Find a matching index url for this request
267 index_url = self._get_index_url(url)
268 if index_url:
269 # Split the credentials from the url.
270 index_info = split_auth_netloc_from_url(index_url)
271 if index_info:
272 index_url, _, index_url_user_password = index_info
273 logger.debug("Found index url %s", index_url)
274
275 # If an index URL was found, try its embedded credentials
276 if index_url and index_url_user_password[0] is not None:
277 username, password = index_url_user_password
278 if username is not None and password is not None:
279 logger.debug("Found credentials in index url for %s", netloc)
280 return index_url_user_password
281
282 # Get creds from netrc if we still don't have them
283 if allow_netrc:
284 netrc_auth = get_netrc_auth(original_url)
285 if netrc_auth:
286 logger.debug("Found credentials in netrc for %s", netloc)
287 return netrc_auth
288
289 # If we don't have a password and keyring is available, use it.
290 if allow_keyring:
291 # The index url is more specific than the netloc, so try it first
292 kr_auth = (_get_keyring_auth(index_url, username) or
293 _get_keyring_auth(netloc, username))
294 if kr_auth:
295 logger.debug("Found credentials in keyring for %s", netloc)
296 return kr_auth
297
298 return username, password
299
300 def _get_url_and_credentials(self, original_url):
301 """Return the credentials to use for the provided URL.
302
303 If allowed, netrc and keyring may be used to obtain the
304 correct credentials.
305
306 Returns (url_without_credentials, username, password). Note
307 that even if the original URL contains credentials, this
308 function may return a different username and password.
309 """
310 url, netloc, _ = split_auth_netloc_from_url(original_url)
311
312 # Use any stored credentials that we have for this netloc
313 username, password = self.passwords.get(netloc, (None, None))
314
315 if username is None and password is None:
316 # No stored credentials. Acquire new credentials without prompting
317 # the user. (e.g. from netrc, keyring, or the URL itself)
318 username, password = self._get_new_credentials(original_url)
319
320 if username is not None or password is not None:
321 # Convert the username and password if they're None, so that
322 # this netloc will show up as "cached" in the conditional above.
323 # Further, HTTPBasicAuth doesn't accept None, so it makes sense to
324 # cache the value that is going to be used.
325 username = username or ""
326 password = password or ""
327
328 # Store any acquired credentials.
329 self.passwords[netloc] = (username, password)
330
331 assert (
332 # Credentials were found
333 (username is not None and password is not None) or
334 # Credentials were not found
335 (username is None and password is None)
336 ), "Could not load credentials from url: {}".format(original_url)
337
338 return url, username, password
339
340 def __call__(self, req):
341 # Get credentials for this request
342 url, username, password = self._get_url_and_credentials(req.url)
343
344 # Set the url of the request to the url without any credentials
345 req.url = url
346
347 if username is not None and password is not None:
348 # Send the basic auth with this request
349 req = HTTPBasicAuth(username, password)(req)
350
351 # Attach a hook to handle 401 responses
352 req.register_hook("response", self.handle_401)
353
354 return req
355
356 # Factored out to allow for easy patching in tests
357 def _prompt_for_password(self, netloc):
358 username = ask_input("User for %s: " % netloc)
359 if not username:
360 return None, None
361 auth = _get_keyring_auth(netloc, username)
362 if auth:
363 return auth[0], auth[1], False
364 password = ask_password("Password: ")
365 return username, password, True
366
367 # Factored out to allow for easy patching in tests
368 def _should_save_password_to_keyring(self):
369 if not keyring:
370 return False
371 return ask("Save credentials to keyring [y/N]: ", ["y", "n"]) == "y"
372
373 def handle_401(self, resp, **kwargs):
374 # We only care about 401 responses, anything else we want to just
375 # pass through the actual response
376 if resp.status_code != 401:
377 return resp
378
379 # We are not able to prompt the user so simply return the response
380 if not self.prompting:
381 return resp
382
383 parsed = urllib_parse.urlparse(resp.url)
384
385 # Prompt the user for a new username and password
386 username, password, save = self._prompt_for_password(parsed.netloc)
387
388 # Store the new username and password to use for future requests
389 self._credentials_to_save = None
390 if username is not None and password is not None:
391 self.passwords[parsed.netloc] = (username, password)
392
393 # Prompt to save the password to keyring
394 if save and self._should_save_password_to_keyring():
395 self._credentials_to_save = (parsed.netloc, username, password)
396
397 # Consume content and release the original connection to allow our new
398 # request to reuse the same one.
399 resp.content
400 resp.raw.release_conn()
401
402 # Add our new username and password to the request
403 req = HTTPBasicAuth(username or "", password or "")(resp.request)
404 req.register_hook("response", self.warn_on_401)
405
406 # On successful request, save the credentials that were used to
407 # keyring. (Note that if the user responded "no" above, this member
408 # is not set and nothing will be saved.)
409 if self._credentials_to_save:
410 req.register_hook("response", self.save_credentials)
411
412 # Send our new request
413 new_resp = resp.connection.send(req, **kwargs)
414 new_resp.history.append(resp)
415
416 return new_resp
417
418 def warn_on_401(self, resp, **kwargs):
419 """Response callback to warn about incorrect credentials."""
420 if resp.status_code == 401:
421 logger.warning('401 Error, Credentials not correct for %s',
422 resp.request.url)
423
424 def save_credentials(self, resp, **kwargs):
425 """Response callback to save credentials on success."""
426 assert keyring is not None, "should never reach here without keyring"
427 if not keyring:
428 return
429
430 creds = self._credentials_to_save
431 self._credentials_to_save = None
432 if creds and resp.status_code < 400:
433 try:
434 logger.info('Saving credentials to keyring')
435 keyring.set_password(*creds)
436 except Exception:
437 logger.exception('Failed to save credentials')
438
439
440 class LocalFSAdapter(BaseAdapter):
441
442 def send(self, request, stream=None, timeout=None, verify=None, cert=None,
443 proxies=None):
444 pathname = url_to_path(request.url)
445
446 resp = Response()
447 resp.status_code = 200
448 resp.url = request.url
449
450 try:
451 stats = os.stat(pathname)
452 except OSError as exc:
453 resp.status_code = 404
454 resp.raw = exc
455 else:
456 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
457 content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
458 resp.headers = CaseInsensitiveDict({
459 "Content-Type": content_type,
460 "Content-Length": stats.st_size,
461 "Last-Modified": modified,
462 })
463
464 resp.raw = open(pathname, "rb")
465 resp.close = resp.raw.close
466
467 return resp
468
469 def close(self):
470 pass
471
472
473 class SafeFileCache(FileCache):
474 """
475 A file based cache which is safe to use even when the target directory may
476 not be accessible or writable.
477 """
478
479 def __init__(self, *args, **kwargs):
480 super(SafeFileCache, self).__init__(*args, **kwargs)
481
482 # Check to ensure that the directory containing our cache directory
483 # is owned by the user current executing pip. If it does not exist
484 # we will check the parent directory until we find one that does exist.
485 # If it is not owned by the user executing pip then we will disable
486 # the cache and log a warning.
487 if not check_path_owner(self.directory):
488 logger.warning(
489 "The directory '%s' or its parent directory is not owned by "
490 "the current user and the cache has been disabled. Please "
491 "check the permissions and owner of that directory. If "
492 "executing pip with sudo, you may want sudo's -H flag.",
493 self.directory,
494 )
495
496 # Set our directory to None to disable the Cache
497 self.directory = None
498
499 def get(self, *args, **kwargs):
500 # If we don't have a directory, then the cache should be a no-op.
501 if self.directory is None:
502 return
503
504 try:
505 return super(SafeFileCache, self).get(*args, **kwargs)
506 except (LockError, OSError, IOError):
507 # We intentionally silence this error, if we can't access the cache
508 # then we can just skip caching and process the request as if
509 # caching wasn't enabled.
510 pass
511
512 def set(self, *args, **kwargs):
513 # If we don't have a directory, then the cache should be a no-op.
514 if self.directory is None:
515 return
516
517 try:
518 return super(SafeFileCache, self).set(*args, **kwargs)
519 except (LockError, OSError, IOError):
520 # We intentionally silence this error, if we can't access the cache
521 # then we can just skip caching and process the request as if
522 # caching wasn't enabled.
523 pass
524
525 def delete(self, *args, **kwargs):
526 # If we don't have a directory, then the cache should be a no-op.
527 if self.directory is None:
528 return
529
530 try:
531 return super(SafeFileCache, self).delete(*args, **kwargs)
532 except (LockError, OSError, IOError):
533 # We intentionally silence this error, if we can't access the cache
534 # then we can just skip caching and process the request as if
535 # caching wasn't enabled.
536 pass
537
538
539 class InsecureHTTPAdapter(HTTPAdapter):
540
541 def cert_verify(self, conn, url, verify, cert):
542 conn.cert_reqs = 'CERT_NONE'
543 conn.ca_certs = None
544
545
546 class PipSession(requests.Session):
547
548 timeout = None # type: Optional[int]
549
550 def __init__(self, *args, **kwargs):
551 retries = kwargs.pop("retries", 0)
552 cache = kwargs.pop("cache", None)
553 insecure_hosts = kwargs.pop("insecure_hosts", [])
554 index_urls = kwargs.pop("index_urls", None)
555
556 super(PipSession, self).__init__(*args, **kwargs)
557
558 # Attach our User Agent to the request
559 self.headers["User-Agent"] = user_agent()
560
561 # Attach our Authentication handler to the session
562 self.auth = MultiDomainBasicAuth(index_urls=index_urls)
563
564 # Create our urllib3.Retry instance which will allow us to customize
565 # how we handle retries.
566 retries = urllib3.Retry(
567 # Set the total number of retries that a particular request can
568 # have.
569 total=retries,
570
571 # A 503 error from PyPI typically means that the Fastly -> Origin
572 # connection got interrupted in some way. A 503 error in general
573 # is typically considered a transient error so we'll go ahead and
574 # retry it.
575 # A 500 may indicate transient error in Amazon S3
576 # A 520 or 527 - may indicate transient error in CloudFlare
577 status_forcelist=[500, 503, 520, 527],
578
579 # Add a small amount of back off between failed requests in
580 # order to prevent hammering the service.
581 backoff_factor=0.25,
582 )
583
584 # We want to _only_ cache responses on securely fetched origins. We do
585 # this because we can't validate the response of an insecurely fetched
586 # origin, and we don't want someone to be able to poison the cache and
587 # require manual eviction from the cache to fix it.
588 if cache:
589 secure_adapter = CacheControlAdapter(
590 cache=SafeFileCache(cache, use_dir_lock=True),
591 max_retries=retries,
592 )
593 else:
594 secure_adapter = HTTPAdapter(max_retries=retries)
595
596 # Our Insecure HTTPAdapter disables HTTPS validation. It does not
597 # support caching (see above) so we'll use it for all http:// URLs as
598 # well as any https:// host that we've marked as ignoring TLS errors
599 # for.
600 insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
601 # Save this for later use in add_insecure_host().
602 self._insecure_adapter = insecure_adapter
603
604 self.mount("https://", secure_adapter)
605 self.mount("http://", insecure_adapter)
606
607 # Enable file:// urls
608 self.mount("file://", LocalFSAdapter())
609
610 # We want to use a non-validating adapter for any requests which are
611 # deemed insecure.
612 for host in insecure_hosts:
613 self.add_insecure_host(host)
614
615 def add_insecure_host(self, host):
616 # type: (str) -> None
617 self.mount('https://{}/'.format(host), self._insecure_adapter)
618
619 def request(self, method, url, *args, **kwargs):
620 # Allow setting a default timeout on a session
621 kwargs.setdefault("timeout", self.timeout)
622
623 # Dispatch the actual request
624 return super(PipSession, self).request(method, url, *args, **kwargs)
625
626
627 def get_file_content(url, comes_from=None, session=None):
628 # type: (str, Optional[str], Optional[PipSession]) -> Tuple[str, Text]
629 """Gets the content of a file; it may be a filename, file: URL, or
630 http: URL. Returns (location, content). Content is unicode.
631
632 :param url: File path or url.
633 :param comes_from: Origin description of requirements.
634 :param session: Instance of pip.download.PipSession.
635 """
636 if session is None:
637 raise TypeError(
638 "get_file_content() missing 1 required keyword argument: 'session'"
639 )
640
641 match = _scheme_re.search(url)
642 if match:
643 scheme = match.group(1).lower()
644 if (scheme == 'file' and comes_from and
645 comes_from.startswith('http')):
646 raise InstallationError(
647 'Requirements file %s references URL %s, which is local'
648 % (comes_from, url))
649 if scheme == 'file':
650 path = url.split(':', 1)[1]
651 path = path.replace('\\', '/')
652 match = _url_slash_drive_re.match(path)
653 if match:
654 path = match.group(1) + ':' + path.split('|', 1)[1]
655 path = urllib_parse.unquote(path)
656 if path.startswith('/'):
657 path = '/' + path.lstrip('/')
658 url = path
659 else:
660 # FIXME: catch some errors
661 resp = session.get(url)
662 resp.raise_for_status()
663 return resp.url, resp.text
664 try:
665 with open(url, 'rb') as f:
666 content = auto_decode(f.read())
667 except IOError as exc:
668 raise InstallationError(
669 'Could not open requirements file: %s' % str(exc)
670 )
671 return url, content
672
673
674 _scheme_re = re.compile(r'^(http|https|file):', re.I)
675 _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)
676
677
678 def is_url(name):
679 # type: (Union[str, Text]) -> bool
680 """Returns true if the name looks like a URL"""
681 if ':' not in name:
682 return False
683 scheme = name.split(':', 1)[0].lower()
684 return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
685
686
687 def url_to_path(url):
688 # type: (str) -> str
689 """
690 Convert a file: URL to a path.
691 """
692 assert url.startswith('file:'), (
693 "You can only turn file: urls into filenames (not %r)" % url)
694
695 _, netloc, path, _, _ = urllib_parse.urlsplit(url)
696
697 if not netloc or netloc == 'localhost':
698 # According to RFC 8089, same as empty authority.
699 netloc = ''
700 elif sys.platform == 'win32':
701 # If we have a UNC path, prepend UNC share notation.
702 netloc = '\\\\' + netloc
703 else:
704 raise ValueError(
705 'non-local file URIs are not supported on this platform: %r'
706 % url
707 )
708
709 path = urllib_request.url2pathname(netloc + path)
710 return path
711
712
713 def is_archive_file(name):
714 # type: (str) -> bool
715 """Return True if `name` is a considered as an archive file."""
716 ext = splitext(name)[1].lower()
717 if ext in ARCHIVE_EXTENSIONS:
718 return True
719 return False
720
721
722 def unpack_vcs_link(link, location):
723 vcs_backend = _get_used_vcs_backend(link)
724 vcs_backend.unpack(location, url=link.url)
725
726
727 def _get_used_vcs_backend(link):
728 # type: (Link) -> Optional[VersionControl]
729 """
730 Return a VersionControl object or None.
731 """
732 for vcs_backend in vcs.backends:
733 if link.scheme in vcs_backend.schemes:
734 return vcs_backend
735 return None
736
737
738 def is_vcs_url(link):
739 # type: (Link) -> bool
740 return bool(_get_used_vcs_backend(link))
741
742
743 def is_file_url(link):
744 # type: (Link) -> bool
745 return link.url.lower().startswith('file:')
746
747
748 def is_dir_url(link):
749 # type: (Link) -> bool
750 """Return whether a file:// Link points to a directory.
751
752 ``link`` must not have any other scheme but file://. Call is_file_url()
753 first.
754
755 """
756 link_path = url_to_path(link.url_without_fragment)
757 return os.path.isdir(link_path)
758
759
760 def _progress_indicator(iterable, *args, **kwargs):
761 return iterable
762
763
764 def _download_url(
765 resp, # type: Response
766 link, # type: Link
767 content_file, # type: IO
768 hashes, # type: Optional[Hashes]
769 progress_bar # type: str
770 ):
771 # type: (...) -> None
772 try:
773 total_length = int(resp.headers['content-length'])
774 except (ValueError, KeyError, TypeError):
775 total_length = 0
776
777 cached_resp = getattr(resp, "from_cache", False)
778 if logger.getEffectiveLevel() > logging.INFO:
779 show_progress = False
780 elif cached_resp:
781 show_progress = False
782 elif total_length > (40 * 1000):
783 show_progress = True
784 elif not total_length:
785 show_progress = True
786 else:
787 show_progress = False
788
789 show_url = link.show_url
790
791 def resp_read(chunk_size):
792 try:
793 # Special case for urllib3.
794 for chunk in resp.raw.stream(
795 chunk_size,
796 # We use decode_content=False here because we don't
797 # want urllib3 to mess with the raw bytes we get
798 # from the server. If we decompress inside of
799 # urllib3 then we cannot verify the checksum
800 # because the checksum will be of the compressed
801 # file. This breakage will only occur if the
802 # server adds a Content-Encoding header, which
803 # depends on how the server was configured:
804 # - Some servers will notice that the file isn't a
805 # compressible file and will leave the file alone
806 # and with an empty Content-Encoding
807 # - Some servers will notice that the file is
808 # already compressed and will leave the file
809 # alone and will add a Content-Encoding: gzip
810 # header
811 # - Some servers won't notice anything at all and
812 # will take a file that's already been compressed
813 # and compress it again and set the
814 # Content-Encoding: gzip header
815 #
816 # By setting this not to decode automatically we
817 # hope to eliminate problems with the second case.
818 decode_content=False):
819 yield chunk
820 except AttributeError:
821 # Standard file-like object.
822 while True:
823 chunk = resp.raw.read(chunk_size)
824 if not chunk:
825 break
826 yield chunk
827
828 def written_chunks(chunks):
829 for chunk in chunks:
830 content_file.write(chunk)
831 yield chunk
832
833 progress_indicator = _progress_indicator
834
835 if link.netloc == PyPI.netloc:
836 url = show_url
837 else:
838 url = link.url_without_fragment
839
840 if show_progress: # We don't show progress on cached responses
841 progress_indicator = DownloadProgressProvider(progress_bar,
842 max=total_length)
843 if total_length:
844 logger.info("Downloading %s (%s)", url, format_size(total_length))
845 else:
846 logger.info("Downloading %s", url)
847 elif cached_resp:
848 logger.info("Using cached %s", url)
849 else:
850 logger.info("Downloading %s", url)
851
852 logger.debug('Downloading from URL %s', link)
853
854 downloaded_chunks = written_chunks(
855 progress_indicator(
856 resp_read(CONTENT_CHUNK_SIZE),
857 CONTENT_CHUNK_SIZE
858 )
859 )
860 if hashes:
861 hashes.check_against_chunks(downloaded_chunks)
862 else:
863 consume(downloaded_chunks)
864
865
866 def _copy_file(filename, location, link):
867 copy = True
868 download_location = os.path.join(location, link.filename)
869 if os.path.exists(download_location):
870 response = ask_path_exists(
871 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' %
872 display_path(download_location), ('i', 'w', 'b', 'a'))
873 if response == 'i':
874 copy = False
875 elif response == 'w':
876 logger.warning('Deleting %s', display_path(download_location))
877 os.remove(download_location)
878 elif response == 'b':
879 dest_file = backup_dir(download_location)
880 logger.warning(
881 'Backing up %s to %s',
882 display_path(download_location),
883 display_path(dest_file),
884 )
885 shutil.move(download_location, dest_file)
886 elif response == 'a':
887 sys.exit(-1)
888 if copy:
889 shutil.copy(filename, download_location)
890 logger.info('Saved %s', display_path(download_location))
891
892
893 def unpack_http_url(
894 link, # type: Link
895 location, # type: str
896 download_dir=None, # type: Optional[str]
897 session=None, # type: Optional[PipSession]
898 hashes=None, # type: Optional[Hashes]
899 progress_bar="on" # type: str
900 ):
901 # type: (...) -> None
902 if session is None:
903 raise TypeError(
904 "unpack_http_url() missing 1 required keyword argument: 'session'"
905 )
906
907 with TempDirectory(kind="unpack") as temp_dir:
908 # If a download dir is specified, is the file already downloaded there?
909 already_downloaded_path = None
910 if download_dir:
911 already_downloaded_path = _check_download_dir(link,
912 download_dir,
913 hashes)
914
915 if already_downloaded_path:
916 from_path = already_downloaded_path
917 content_type = mimetypes.guess_type(from_path)[0]
918 else:
919 # let's download to a tmp dir
920 from_path, content_type = _download_http_url(link,
921 session,
922 temp_dir.path,
923 hashes,
924 progress_bar)
925
926 # unpack the archive to the build dir location. even when only
927 # downloading archives, they have to be unpacked to parse dependencies
928 unpack_file(from_path, location, content_type, link)
929
930 # a download dir is specified; let's copy the archive there
931 if download_dir and not already_downloaded_path:
932 _copy_file(from_path, download_dir, link)
933
934 if not already_downloaded_path:
935 os.unlink(from_path)
936
937
938 def unpack_file_url(
939 link, # type: Link
940 location, # type: str
941 download_dir=None, # type: Optional[str]
942 hashes=None # type: Optional[Hashes]
943 ):
944 # type: (...) -> None
945 """Unpack link into location.
946
947 If download_dir is provided and link points to a file, make a copy
948 of the link file inside download_dir.
949 """
950 link_path = url_to_path(link.url_without_fragment)
951
952 # If it's a url to a local directory
953 if is_dir_url(link):
954 if os.path.isdir(location):
955 rmtree(location)
956 shutil.copytree(link_path, location, symlinks=True)
957 if download_dir:
958 logger.info('Link is a directory, ignoring download_dir')
959 return
960
961 # If --require-hashes is off, `hashes` is either empty, the
962 # link's embedded hash, or MissingHashes; it is required to
963 # match. If --require-hashes is on, we are satisfied by any
964 # hash in `hashes` matching: a URL-based or an option-based
965 # one; no internet-sourced hash will be in `hashes`.
966 if hashes:
967 hashes.check_against_path(link_path)
968
969 # If a download dir is specified, is the file already there and valid?
970 already_downloaded_path = None
971 if download_dir:
972 already_downloaded_path = _check_download_dir(link,
973 download_dir,
974 hashes)
975
976 if already_downloaded_path:
977 from_path = already_downloaded_path
978 else:
979 from_path = link_path
980
981 content_type = mimetypes.guess_type(from_path)[0]
982
983 # unpack the archive to the build dir location. even when only downloading
984 # archives, they have to be unpacked to parse dependencies
985 unpack_file(from_path, location, content_type, link)
986
987 # a download dir is specified and not already downloaded
988 if download_dir and not already_downloaded_path:
989 _copy_file(from_path, download_dir, link)
990
991
992 class PipXmlrpcTransport(xmlrpc_client.Transport):
993 """Provide a `xmlrpclib.Transport` implementation via a `PipSession`
994 object.
995 """
996
997 def __init__(self, index_url, session, use_datetime=False):
998 xmlrpc_client.Transport.__init__(self, use_datetime)
999 index_parts = urllib_parse.urlparse(index_url)
1000 self._scheme = index_parts.scheme
1001 self._session = session
1002
1003 def request(self, host, handler, request_body, verbose=False):
1004 parts = (self._scheme, host, handler, None, None, None)
1005 url = urllib_parse.urlunparse(parts)
1006 try:
1007 headers = {'Content-Type': 'text/xml'}
1008 response = self._session.post(url, data=request_body,
1009 headers=headers, stream=True)
1010 response.raise_for_status()
1011 self.verbose = verbose
1012 return self.parse_response(response.raw)
1013 except requests.HTTPError as exc:
1014 logger.critical(
1015 "HTTP error %s while getting %s",
1016 exc.response.status_code, url,
1017 )
1018 raise
1019
1020
1021 def unpack_url(
1022 link, # type: Link
1023 location, # type: str
1024 download_dir=None, # type: Optional[str]
1025 only_download=False, # type: bool
1026 session=None, # type: Optional[PipSession]
1027 hashes=None, # type: Optional[Hashes]
1028 progress_bar="on" # type: str
1029 ):
1030 # type: (...) -> None
1031 """Unpack link.
1032 If link is a VCS link:
1033 if only_download, export into download_dir and ignore location
1034 else unpack into location
1035 for other types of link:
1036 - unpack into location
1037 - if download_dir, copy the file into download_dir
1038 - if only_download, mark location for deletion
1039
1040 :param hashes: A Hashes object, one of whose embedded hashes must match,
1041 or HashMismatch will be raised. If the Hashes is empty, no matches are
1042 required, and unhashable types of requirements (like VCS ones, which
1043 would ordinarily raise HashUnsupported) are allowed.
1044 """
1045 # non-editable vcs urls
1046 if is_vcs_url(link):
1047 unpack_vcs_link(link, location)
1048
1049 # file urls
1050 elif is_file_url(link):
1051 unpack_file_url(link, location, download_dir, hashes=hashes)
1052
1053 # http urls
1054 else:
1055 if session is None:
1056 session = PipSession()
1057
1058 unpack_http_url(
1059 link,
1060 location,
1061 download_dir,
1062 session,
1063 hashes=hashes,
1064 progress_bar=progress_bar
1065 )
1066 if only_download:
1067 write_delete_marker_file(location)
1068
1069
1070 def sanitize_content_filename(filename):
1071 # type: (str) -> str
1072 """
1073 Sanitize the "filename" value from a Content-Disposition header.
1074 """
1075 return os.path.basename(filename)
1076
1077
1078 def parse_content_disposition(content_disposition, default_filename):
1079 # type: (str, str) -> str
1080 """
1081 Parse the "filename" value from a Content-Disposition header, and
1082 return the default filename if the result is empty.
1083 """
1084 _type, params = cgi.parse_header(content_disposition)
1085 filename = params.get('filename')
1086 if filename:
1087 # We need to sanitize the filename to prevent directory traversal
1088 # in case the filename contains ".." path parts.
1089 filename = sanitize_content_filename(filename)
1090 return filename or default_filename
1091
1092
1093 def _download_http_url(
1094 link, # type: Link
1095 session, # type: PipSession
1096 temp_dir, # type: str
1097 hashes, # type: Optional[Hashes]
1098 progress_bar # type: str
1099 ):
1100 # type: (...) -> Tuple[str, str]
1101 """Download link url into temp_dir using provided session"""
1102 target_url = link.url.split('#', 1)[0]
1103 try:
1104 resp = session.get(
1105 target_url,
1106 # We use Accept-Encoding: identity here because requests
1107 # defaults to accepting compressed responses. This breaks in
1108 # a variety of ways depending on how the server is configured.
1109 # - Some servers will notice that the file isn't a compressible
1110 # file and will leave the file alone and with an empty
1111 # Content-Encoding
1112 # - Some servers will notice that the file is already
1113 # compressed and will leave the file alone and will add a
1114 # Content-Encoding: gzip header
1115 # - Some servers won't notice anything at all and will take
1116 # a file that's already been compressed and compress it again
1117 # and set the Content-Encoding: gzip header
1118 # By setting this to request only the identity encoding We're
1119 # hoping to eliminate the third case. Hopefully there does not
1120 # exist a server which when given a file will notice it is
1121 # already compressed and that you're not asking for a
1122 # compressed file and will then decompress it before sending
1123 # because if that's the case I don't think it'll ever be
1124 # possible to make this work.
1125 headers={"Accept-Encoding": "identity"},
1126 stream=True,
1127 )
1128 resp.raise_for_status()
1129 except requests.HTTPError as exc:
1130 logger.critical(
1131 "HTTP error %s while getting %s", exc.response.status_code, link,
1132 )
1133 raise
1134
1135 content_type = resp.headers.get('content-type', '')
1136 filename = link.filename # fallback
1137 # Have a look at the Content-Disposition header for a better guess
1138 content_disposition = resp.headers.get('content-disposition')
1139 if content_disposition:
1140 filename = parse_content_disposition(content_disposition, filename)
1141 ext = splitext(filename)[1] # type: Optional[str]
1142 if not ext:
1143 ext = mimetypes.guess_extension(content_type)
1144 if ext:
1145 filename += ext
1146 if not ext and link.url != resp.url:
1147 ext = os.path.splitext(resp.url)[1]
1148 if ext:
1149 filename += ext
1150 file_path = os.path.join(temp_dir, filename)
1151 with open(file_path, 'wb') as content_file:
1152 _download_url(resp, link, content_file, hashes, progress_bar)
1153 return file_path, content_type
1154
1155
1156 def _check_download_dir(link, download_dir, hashes):
1157 # type: (Link, str, Optional[Hashes]) -> Optional[str]
1158 """ Check download_dir for previously downloaded file with correct hash
1159 If a correct file is found return its path else None
1160 """
1161 download_path = os.path.join(download_dir, link.filename)
1162 if os.path.exists(download_path):
1163 # If already downloaded, does its hash match?
1164 logger.info('File was already downloaded %s', download_path)
1165 if hashes:
1166 try:
1167 hashes.check_against_path(download_path)
1168 except HashMismatch:
1169 logger.warning(
1170 'Previously-downloaded file %s has bad hash. '
1171 'Re-downloading.',
1172 download_path
1173 )
1174 os.unlink(download_path)
1175 return None
1176 return download_path
1177 return None