Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/pip/_internal/download.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 from __future__ import absolute_import | |
| 2 | |
| 3 import cgi | |
| 4 import email.utils | |
| 5 import json | |
| 6 import logging | |
| 7 import mimetypes | |
| 8 import os | |
| 9 import platform | |
| 10 import re | |
| 11 import shutil | |
| 12 import sys | |
| 13 | |
| 14 from pip._vendor import requests, urllib3 | |
| 15 from pip._vendor.cachecontrol import CacheControlAdapter | |
| 16 from pip._vendor.cachecontrol.caches import FileCache | |
| 17 from pip._vendor.lockfile import LockError | |
| 18 from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter | |
| 19 from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth | |
| 20 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response | |
| 21 from pip._vendor.requests.structures import CaseInsensitiveDict | |
| 22 from pip._vendor.requests.utils import get_netrc_auth | |
| 23 # NOTE: XMLRPC Client is not annotated in typeshed as on 2017-07-17, which is | |
| 24 # why we ignore the type on this import | |
| 25 from pip._vendor.six.moves import xmlrpc_client # type: ignore | |
| 26 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
| 27 from pip._vendor.six.moves.urllib import request as urllib_request | |
| 28 | |
| 29 import pip | |
| 30 from pip._internal.exceptions import HashMismatch, InstallationError | |
| 31 from pip._internal.models.index import PyPI | |
| 32 # Import ssl from compat so the initial import occurs in only one place. | |
| 33 from pip._internal.utils.compat import HAS_TLS, ssl | |
| 34 from pip._internal.utils.encoding import auto_decode | |
| 35 from pip._internal.utils.filesystem import check_path_owner | |
| 36 from pip._internal.utils.glibc import libc_ver | |
| 37 from pip._internal.utils.marker_files import write_delete_marker_file | |
| 38 from pip._internal.utils.misc import ( | |
| 39 ARCHIVE_EXTENSIONS, ask, ask_input, ask_password, ask_path_exists, | |
| 40 backup_dir, consume, display_path, format_size, get_installed_version, | |
| 41 path_to_url, remove_auth_from_url, rmtree, split_auth_netloc_from_url, | |
| 42 splitext, unpack_file, | |
| 43 ) | |
| 44 from pip._internal.utils.temp_dir import TempDirectory | |
| 45 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
| 46 from pip._internal.utils.ui import DownloadProgressProvider | |
| 47 from pip._internal.vcs import vcs | |
| 48 | |
| 49 if MYPY_CHECK_RUNNING: | |
| 50 from typing import ( | |
| 51 Optional, Tuple, Dict, IO, Text, Union | |
| 52 ) | |
| 53 from optparse import Values | |
| 54 from pip._internal.models.link import Link | |
| 55 from pip._internal.utils.hashes import Hashes | |
| 56 from pip._internal.vcs.versioncontrol import AuthInfo, VersionControl | |
| 57 | |
| 58 Credentials = Tuple[str, str, str] | |
| 59 | |
| 60 | |
| 61 __all__ = ['get_file_content', | |
| 62 'is_url', 'url_to_path', 'path_to_url', | |
| 63 'is_archive_file', 'unpack_vcs_link', | |
| 64 'unpack_file_url', 'is_vcs_url', 'is_file_url', | |
| 65 'unpack_http_url', 'unpack_url', | |
| 66 'parse_content_disposition', 'sanitize_content_filename'] | |
| 67 | |
| 68 | |
| 69 logger = logging.getLogger(__name__) | |
| 70 | |
| 71 | |
| 72 try: | |
| 73 import keyring # noqa | |
| 74 except ImportError: | |
| 75 keyring = None | |
| 76 except Exception as exc: | |
| 77 logger.warning("Keyring is skipped due to an exception: %s", | |
| 78 str(exc)) | |
| 79 keyring = None | |
| 80 | |
| 81 # These are environment variables present when running under various | |
| 82 # CI systems. For each variable, some CI systems that use the variable | |
| 83 # are indicated. The collection was chosen so that for each of a number | |
| 84 # of popular systems, at least one of the environment variables is used. | |
| 85 # This list is used to provide some indication of and lower bound for | |
| 86 # CI traffic to PyPI. Thus, it is okay if the list is not comprehensive. | |
| 87 # For more background, see: https://github.com/pypa/pip/issues/5499 | |
| 88 CI_ENVIRONMENT_VARIABLES = ( | |
| 89 # Azure Pipelines | |
| 90 'BUILD_BUILDID', | |
| 91 # Jenkins | |
| 92 'BUILD_ID', | |
| 93 # AppVeyor, CircleCI, Codeship, Gitlab CI, Shippable, Travis CI | |
| 94 'CI', | |
| 95 # Explicit environment variable. | |
| 96 'PIP_IS_CI', | |
| 97 ) | |
| 98 | |
| 99 | |
| 100 def looks_like_ci(): | |
| 101 # type: () -> bool | |
| 102 """ | |
| 103 Return whether it looks like pip is running under CI. | |
| 104 """ | |
| 105 # We don't use the method of checking for a tty (e.g. using isatty()) | |
| 106 # because some CI systems mimic a tty (e.g. Travis CI). Thus that | |
| 107 # method doesn't provide definitive information in either direction. | |
| 108 return any(name in os.environ for name in CI_ENVIRONMENT_VARIABLES) | |
| 109 | |
| 110 | |
| 111 def user_agent(): | |
| 112 """ | |
| 113 Return a string representing the user agent. | |
| 114 """ | |
| 115 data = { | |
| 116 "installer": {"name": "pip", "version": pip.__version__}, | |
| 117 "python": platform.python_version(), | |
| 118 "implementation": { | |
| 119 "name": platform.python_implementation(), | |
| 120 }, | |
| 121 } | |
| 122 | |
| 123 if data["implementation"]["name"] == 'CPython': | |
| 124 data["implementation"]["version"] = platform.python_version() | |
| 125 elif data["implementation"]["name"] == 'PyPy': | |
| 126 if sys.pypy_version_info.releaselevel == 'final': | |
| 127 pypy_version_info = sys.pypy_version_info[:3] | |
| 128 else: | |
| 129 pypy_version_info = sys.pypy_version_info | |
| 130 data["implementation"]["version"] = ".".join( | |
| 131 [str(x) for x in pypy_version_info] | |
| 132 ) | |
| 133 elif data["implementation"]["name"] == 'Jython': | |
| 134 # Complete Guess | |
| 135 data["implementation"]["version"] = platform.python_version() | |
| 136 elif data["implementation"]["name"] == 'IronPython': | |
| 137 # Complete Guess | |
| 138 data["implementation"]["version"] = platform.python_version() | |
| 139 | |
| 140 if sys.platform.startswith("linux"): | |
| 141 from pip._vendor import distro | |
| 142 distro_infos = dict(filter( | |
| 143 lambda x: x[1], | |
| 144 zip(["name", "version", "id"], distro.linux_distribution()), | |
| 145 )) | |
| 146 libc = dict(filter( | |
| 147 lambda x: x[1], | |
| 148 zip(["lib", "version"], libc_ver()), | |
| 149 )) | |
| 150 if libc: | |
| 151 distro_infos["libc"] = libc | |
| 152 if distro_infos: | |
| 153 data["distro"] = distro_infos | |
| 154 | |
| 155 if sys.platform.startswith("darwin") and platform.mac_ver()[0]: | |
| 156 data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]} | |
| 157 | |
| 158 if platform.system(): | |
| 159 data.setdefault("system", {})["name"] = platform.system() | |
| 160 | |
| 161 if platform.release(): | |
| 162 data.setdefault("system", {})["release"] = platform.release() | |
| 163 | |
| 164 if platform.machine(): | |
| 165 data["cpu"] = platform.machine() | |
| 166 | |
| 167 if HAS_TLS: | |
| 168 data["openssl_version"] = ssl.OPENSSL_VERSION | |
| 169 | |
| 170 setuptools_version = get_installed_version("setuptools") | |
| 171 if setuptools_version is not None: | |
| 172 data["setuptools_version"] = setuptools_version | |
| 173 | |
| 174 # Use None rather than False so as not to give the impression that | |
| 175 # pip knows it is not being run under CI. Rather, it is a null or | |
| 176 # inconclusive result. Also, we include some value rather than no | |
| 177 # value to make it easier to know that the check has been run. | |
| 178 data["ci"] = True if looks_like_ci() else None | |
| 179 | |
| 180 user_data = os.environ.get("PIP_USER_AGENT_USER_DATA") | |
| 181 if user_data is not None: | |
| 182 data["user_data"] = user_data | |
| 183 | |
| 184 return "{data[installer][name]}/{data[installer][version]} {json}".format( | |
| 185 data=data, | |
| 186 json=json.dumps(data, separators=(",", ":"), sort_keys=True), | |
| 187 ) | |
| 188 | |
| 189 | |
| 190 def _get_keyring_auth(url, username): | |
| 191 """Return the tuple auth for a given url from keyring.""" | |
| 192 if not url or not keyring: | |
| 193 return None | |
| 194 | |
| 195 try: | |
| 196 try: | |
| 197 get_credential = keyring.get_credential | |
| 198 except AttributeError: | |
| 199 pass | |
| 200 else: | |
| 201 logger.debug("Getting credentials from keyring for %s", url) | |
| 202 cred = get_credential(url, username) | |
| 203 if cred is not None: | |
| 204 return cred.username, cred.password | |
| 205 return None | |
| 206 | |
| 207 if username: | |
| 208 logger.debug("Getting password from keyring for %s", url) | |
| 209 password = keyring.get_password(url, username) | |
| 210 if password: | |
| 211 return username, password | |
| 212 | |
| 213 except Exception as exc: | |
| 214 logger.warning("Keyring is skipped due to an exception: %s", | |
| 215 str(exc)) | |
| 216 | |
| 217 | |
| 218 class MultiDomainBasicAuth(AuthBase): | |
| 219 | |
| 220 def __init__(self, prompting=True, index_urls=None): | |
| 221 # type: (bool, Optional[Values]) -> None | |
| 222 self.prompting = prompting | |
| 223 self.index_urls = index_urls | |
| 224 self.passwords = {} # type: Dict[str, AuthInfo] | |
| 225 # When the user is prompted to enter credentials and keyring is | |
| 226 # available, we will offer to save them. If the user accepts, | |
| 227 # this value is set to the credentials they entered. After the | |
| 228 # request authenticates, the caller should call | |
| 229 # ``save_credentials`` to save these. | |
| 230 self._credentials_to_save = None # type: Optional[Credentials] | |
| 231 | |
| 232 def _get_index_url(self, url): | |
| 233 """Return the original index URL matching the requested URL. | |
| 234 | |
| 235 Cached or dynamically generated credentials may work against | |
| 236 the original index URL rather than just the netloc. | |
| 237 | |
| 238 The provided url should have had its username and password | |
| 239 removed already. If the original index url had credentials then | |
| 240 they will be included in the return value. | |
| 241 | |
| 242 Returns None if no matching index was found, or if --no-index | |
| 243 was specified by the user. | |
| 244 """ | |
| 245 if not url or not self.index_urls: | |
| 246 return None | |
| 247 | |
| 248 for u in self.index_urls: | |
| 249 prefix = remove_auth_from_url(u).rstrip("/") + "/" | |
| 250 if url.startswith(prefix): | |
| 251 return u | |
| 252 | |
| 253 def _get_new_credentials(self, original_url, allow_netrc=True, | |
| 254 allow_keyring=True): | |
| 255 """Find and return credentials for the specified URL.""" | |
| 256 # Split the credentials and netloc from the url. | |
| 257 url, netloc, url_user_password = split_auth_netloc_from_url( | |
| 258 original_url) | |
| 259 | |
| 260 # Start with the credentials embedded in the url | |
| 261 username, password = url_user_password | |
| 262 if username is not None and password is not None: | |
| 263 logger.debug("Found credentials in url for %s", netloc) | |
| 264 return url_user_password | |
| 265 | |
| 266 # Find a matching index url for this request | |
| 267 index_url = self._get_index_url(url) | |
| 268 if index_url: | |
| 269 # Split the credentials from the url. | |
| 270 index_info = split_auth_netloc_from_url(index_url) | |
| 271 if index_info: | |
| 272 index_url, _, index_url_user_password = index_info | |
| 273 logger.debug("Found index url %s", index_url) | |
| 274 | |
| 275 # If an index URL was found, try its embedded credentials | |
| 276 if index_url and index_url_user_password[0] is not None: | |
| 277 username, password = index_url_user_password | |
| 278 if username is not None and password is not None: | |
| 279 logger.debug("Found credentials in index url for %s", netloc) | |
| 280 return index_url_user_password | |
| 281 | |
| 282 # Get creds from netrc if we still don't have them | |
| 283 if allow_netrc: | |
| 284 netrc_auth = get_netrc_auth(original_url) | |
| 285 if netrc_auth: | |
| 286 logger.debug("Found credentials in netrc for %s", netloc) | |
| 287 return netrc_auth | |
| 288 | |
| 289 # If we don't have a password and keyring is available, use it. | |
| 290 if allow_keyring: | |
| 291 # The index url is more specific than the netloc, so try it first | |
| 292 kr_auth = (_get_keyring_auth(index_url, username) or | |
| 293 _get_keyring_auth(netloc, username)) | |
| 294 if kr_auth: | |
| 295 logger.debug("Found credentials in keyring for %s", netloc) | |
| 296 return kr_auth | |
| 297 | |
| 298 return username, password | |
| 299 | |
| 300 def _get_url_and_credentials(self, original_url): | |
| 301 """Return the credentials to use for the provided URL. | |
| 302 | |
| 303 If allowed, netrc and keyring may be used to obtain the | |
| 304 correct credentials. | |
| 305 | |
| 306 Returns (url_without_credentials, username, password). Note | |
| 307 that even if the original URL contains credentials, this | |
| 308 function may return a different username and password. | |
| 309 """ | |
| 310 url, netloc, _ = split_auth_netloc_from_url(original_url) | |
| 311 | |
| 312 # Use any stored credentials that we have for this netloc | |
| 313 username, password = self.passwords.get(netloc, (None, None)) | |
| 314 | |
| 315 if username is None and password is None: | |
| 316 # No stored credentials. Acquire new credentials without prompting | |
| 317 # the user. (e.g. from netrc, keyring, or the URL itself) | |
| 318 username, password = self._get_new_credentials(original_url) | |
| 319 | |
| 320 if username is not None or password is not None: | |
| 321 # Convert the username and password if they're None, so that | |
| 322 # this netloc will show up as "cached" in the conditional above. | |
| 323 # Further, HTTPBasicAuth doesn't accept None, so it makes sense to | |
| 324 # cache the value that is going to be used. | |
| 325 username = username or "" | |
| 326 password = password or "" | |
| 327 | |
| 328 # Store any acquired credentials. | |
| 329 self.passwords[netloc] = (username, password) | |
| 330 | |
| 331 assert ( | |
| 332 # Credentials were found | |
| 333 (username is not None and password is not None) or | |
| 334 # Credentials were not found | |
| 335 (username is None and password is None) | |
| 336 ), "Could not load credentials from url: {}".format(original_url) | |
| 337 | |
| 338 return url, username, password | |
| 339 | |
| 340 def __call__(self, req): | |
| 341 # Get credentials for this request | |
| 342 url, username, password = self._get_url_and_credentials(req.url) | |
| 343 | |
| 344 # Set the url of the request to the url without any credentials | |
| 345 req.url = url | |
| 346 | |
| 347 if username is not None and password is not None: | |
| 348 # Send the basic auth with this request | |
| 349 req = HTTPBasicAuth(username, password)(req) | |
| 350 | |
| 351 # Attach a hook to handle 401 responses | |
| 352 req.register_hook("response", self.handle_401) | |
| 353 | |
| 354 return req | |
| 355 | |
| 356 # Factored out to allow for easy patching in tests | |
| 357 def _prompt_for_password(self, netloc): | |
| 358 username = ask_input("User for %s: " % netloc) | |
| 359 if not username: | |
| 360 return None, None | |
| 361 auth = _get_keyring_auth(netloc, username) | |
| 362 if auth: | |
| 363 return auth[0], auth[1], False | |
| 364 password = ask_password("Password: ") | |
| 365 return username, password, True | |
| 366 | |
| 367 # Factored out to allow for easy patching in tests | |
| 368 def _should_save_password_to_keyring(self): | |
| 369 if not keyring: | |
| 370 return False | |
| 371 return ask("Save credentials to keyring [y/N]: ", ["y", "n"]) == "y" | |
| 372 | |
| 373 def handle_401(self, resp, **kwargs): | |
| 374 # We only care about 401 responses, anything else we want to just | |
| 375 # pass through the actual response | |
| 376 if resp.status_code != 401: | |
| 377 return resp | |
| 378 | |
| 379 # We are not able to prompt the user so simply return the response | |
| 380 if not self.prompting: | |
| 381 return resp | |
| 382 | |
| 383 parsed = urllib_parse.urlparse(resp.url) | |
| 384 | |
| 385 # Prompt the user for a new username and password | |
| 386 username, password, save = self._prompt_for_password(parsed.netloc) | |
| 387 | |
| 388 # Store the new username and password to use for future requests | |
| 389 self._credentials_to_save = None | |
| 390 if username is not None and password is not None: | |
| 391 self.passwords[parsed.netloc] = (username, password) | |
| 392 | |
| 393 # Prompt to save the password to keyring | |
| 394 if save and self._should_save_password_to_keyring(): | |
| 395 self._credentials_to_save = (parsed.netloc, username, password) | |
| 396 | |
| 397 # Consume content and release the original connection to allow our new | |
| 398 # request to reuse the same one. | |
| 399 resp.content | |
| 400 resp.raw.release_conn() | |
| 401 | |
| 402 # Add our new username and password to the request | |
| 403 req = HTTPBasicAuth(username or "", password or "")(resp.request) | |
| 404 req.register_hook("response", self.warn_on_401) | |
| 405 | |
| 406 # On successful request, save the credentials that were used to | |
| 407 # keyring. (Note that if the user responded "no" above, this member | |
| 408 # is not set and nothing will be saved.) | |
| 409 if self._credentials_to_save: | |
| 410 req.register_hook("response", self.save_credentials) | |
| 411 | |
| 412 # Send our new request | |
| 413 new_resp = resp.connection.send(req, **kwargs) | |
| 414 new_resp.history.append(resp) | |
| 415 | |
| 416 return new_resp | |
| 417 | |
| 418 def warn_on_401(self, resp, **kwargs): | |
| 419 """Response callback to warn about incorrect credentials.""" | |
| 420 if resp.status_code == 401: | |
| 421 logger.warning('401 Error, Credentials not correct for %s', | |
| 422 resp.request.url) | |
| 423 | |
| 424 def save_credentials(self, resp, **kwargs): | |
| 425 """Response callback to save credentials on success.""" | |
| 426 assert keyring is not None, "should never reach here without keyring" | |
| 427 if not keyring: | |
| 428 return | |
| 429 | |
| 430 creds = self._credentials_to_save | |
| 431 self._credentials_to_save = None | |
| 432 if creds and resp.status_code < 400: | |
| 433 try: | |
| 434 logger.info('Saving credentials to keyring') | |
| 435 keyring.set_password(*creds) | |
| 436 except Exception: | |
| 437 logger.exception('Failed to save credentials') | |
| 438 | |
| 439 | |
| 440 class LocalFSAdapter(BaseAdapter): | |
| 441 | |
| 442 def send(self, request, stream=None, timeout=None, verify=None, cert=None, | |
| 443 proxies=None): | |
| 444 pathname = url_to_path(request.url) | |
| 445 | |
| 446 resp = Response() | |
| 447 resp.status_code = 200 | |
| 448 resp.url = request.url | |
| 449 | |
| 450 try: | |
| 451 stats = os.stat(pathname) | |
| 452 except OSError as exc: | |
| 453 resp.status_code = 404 | |
| 454 resp.raw = exc | |
| 455 else: | |
| 456 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) | |
| 457 content_type = mimetypes.guess_type(pathname)[0] or "text/plain" | |
| 458 resp.headers = CaseInsensitiveDict({ | |
| 459 "Content-Type": content_type, | |
| 460 "Content-Length": stats.st_size, | |
| 461 "Last-Modified": modified, | |
| 462 }) | |
| 463 | |
| 464 resp.raw = open(pathname, "rb") | |
| 465 resp.close = resp.raw.close | |
| 466 | |
| 467 return resp | |
| 468 | |
| 469 def close(self): | |
| 470 pass | |
| 471 | |
| 472 | |
| 473 class SafeFileCache(FileCache): | |
| 474 """ | |
| 475 A file based cache which is safe to use even when the target directory may | |
| 476 not be accessible or writable. | |
| 477 """ | |
| 478 | |
| 479 def __init__(self, *args, **kwargs): | |
| 480 super(SafeFileCache, self).__init__(*args, **kwargs) | |
| 481 | |
| 482 # Check to ensure that the directory containing our cache directory | |
| 483 # is owned by the user current executing pip. If it does not exist | |
| 484 # we will check the parent directory until we find one that does exist. | |
| 485 # If it is not owned by the user executing pip then we will disable | |
| 486 # the cache and log a warning. | |
| 487 if not check_path_owner(self.directory): | |
| 488 logger.warning( | |
| 489 "The directory '%s' or its parent directory is not owned by " | |
| 490 "the current user and the cache has been disabled. Please " | |
| 491 "check the permissions and owner of that directory. If " | |
| 492 "executing pip with sudo, you may want sudo's -H flag.", | |
| 493 self.directory, | |
| 494 ) | |
| 495 | |
| 496 # Set our directory to None to disable the Cache | |
| 497 self.directory = None | |
| 498 | |
| 499 def get(self, *args, **kwargs): | |
| 500 # If we don't have a directory, then the cache should be a no-op. | |
| 501 if self.directory is None: | |
| 502 return | |
| 503 | |
| 504 try: | |
| 505 return super(SafeFileCache, self).get(*args, **kwargs) | |
| 506 except (LockError, OSError, IOError): | |
| 507 # We intentionally silence this error, if we can't access the cache | |
| 508 # then we can just skip caching and process the request as if | |
| 509 # caching wasn't enabled. | |
| 510 pass | |
| 511 | |
| 512 def set(self, *args, **kwargs): | |
| 513 # If we don't have a directory, then the cache should be a no-op. | |
| 514 if self.directory is None: | |
| 515 return | |
| 516 | |
| 517 try: | |
| 518 return super(SafeFileCache, self).set(*args, **kwargs) | |
| 519 except (LockError, OSError, IOError): | |
| 520 # We intentionally silence this error, if we can't access the cache | |
| 521 # then we can just skip caching and process the request as if | |
| 522 # caching wasn't enabled. | |
| 523 pass | |
| 524 | |
| 525 def delete(self, *args, **kwargs): | |
| 526 # If we don't have a directory, then the cache should be a no-op. | |
| 527 if self.directory is None: | |
| 528 return | |
| 529 | |
| 530 try: | |
| 531 return super(SafeFileCache, self).delete(*args, **kwargs) | |
| 532 except (LockError, OSError, IOError): | |
| 533 # We intentionally silence this error, if we can't access the cache | |
| 534 # then we can just skip caching and process the request as if | |
| 535 # caching wasn't enabled. | |
| 536 pass | |
| 537 | |
| 538 | |
| 539 class InsecureHTTPAdapter(HTTPAdapter): | |
| 540 | |
| 541 def cert_verify(self, conn, url, verify, cert): | |
| 542 conn.cert_reqs = 'CERT_NONE' | |
| 543 conn.ca_certs = None | |
| 544 | |
| 545 | |
| 546 class PipSession(requests.Session): | |
| 547 | |
| 548 timeout = None # type: Optional[int] | |
| 549 | |
| 550 def __init__(self, *args, **kwargs): | |
| 551 retries = kwargs.pop("retries", 0) | |
| 552 cache = kwargs.pop("cache", None) | |
| 553 insecure_hosts = kwargs.pop("insecure_hosts", []) | |
| 554 index_urls = kwargs.pop("index_urls", None) | |
| 555 | |
| 556 super(PipSession, self).__init__(*args, **kwargs) | |
| 557 | |
| 558 # Attach our User Agent to the request | |
| 559 self.headers["User-Agent"] = user_agent() | |
| 560 | |
| 561 # Attach our Authentication handler to the session | |
| 562 self.auth = MultiDomainBasicAuth(index_urls=index_urls) | |
| 563 | |
| 564 # Create our urllib3.Retry instance which will allow us to customize | |
| 565 # how we handle retries. | |
| 566 retries = urllib3.Retry( | |
| 567 # Set the total number of retries that a particular request can | |
| 568 # have. | |
| 569 total=retries, | |
| 570 | |
| 571 # A 503 error from PyPI typically means that the Fastly -> Origin | |
| 572 # connection got interrupted in some way. A 503 error in general | |
| 573 # is typically considered a transient error so we'll go ahead and | |
| 574 # retry it. | |
| 575 # A 500 may indicate transient error in Amazon S3 | |
| 576 # A 520 or 527 - may indicate transient error in CloudFlare | |
| 577 status_forcelist=[500, 503, 520, 527], | |
| 578 | |
| 579 # Add a small amount of back off between failed requests in | |
| 580 # order to prevent hammering the service. | |
| 581 backoff_factor=0.25, | |
| 582 ) | |
| 583 | |
| 584 # We want to _only_ cache responses on securely fetched origins. We do | |
| 585 # this because we can't validate the response of an insecurely fetched | |
| 586 # origin, and we don't want someone to be able to poison the cache and | |
| 587 # require manual eviction from the cache to fix it. | |
| 588 if cache: | |
| 589 secure_adapter = CacheControlAdapter( | |
| 590 cache=SafeFileCache(cache, use_dir_lock=True), | |
| 591 max_retries=retries, | |
| 592 ) | |
| 593 else: | |
| 594 secure_adapter = HTTPAdapter(max_retries=retries) | |
| 595 | |
| 596 # Our Insecure HTTPAdapter disables HTTPS validation. It does not | |
| 597 # support caching (see above) so we'll use it for all http:// URLs as | |
| 598 # well as any https:// host that we've marked as ignoring TLS errors | |
| 599 # for. | |
| 600 insecure_adapter = InsecureHTTPAdapter(max_retries=retries) | |
| 601 # Save this for later use in add_insecure_host(). | |
| 602 self._insecure_adapter = insecure_adapter | |
| 603 | |
| 604 self.mount("https://", secure_adapter) | |
| 605 self.mount("http://", insecure_adapter) | |
| 606 | |
| 607 # Enable file:// urls | |
| 608 self.mount("file://", LocalFSAdapter()) | |
| 609 | |
| 610 # We want to use a non-validating adapter for any requests which are | |
| 611 # deemed insecure. | |
| 612 for host in insecure_hosts: | |
| 613 self.add_insecure_host(host) | |
| 614 | |
| 615 def add_insecure_host(self, host): | |
| 616 # type: (str) -> None | |
| 617 self.mount('https://{}/'.format(host), self._insecure_adapter) | |
| 618 | |
| 619 def request(self, method, url, *args, **kwargs): | |
| 620 # Allow setting a default timeout on a session | |
| 621 kwargs.setdefault("timeout", self.timeout) | |
| 622 | |
| 623 # Dispatch the actual request | |
| 624 return super(PipSession, self).request(method, url, *args, **kwargs) | |
| 625 | |
| 626 | |
| 627 def get_file_content(url, comes_from=None, session=None): | |
| 628 # type: (str, Optional[str], Optional[PipSession]) -> Tuple[str, Text] | |
| 629 """Gets the content of a file; it may be a filename, file: URL, or | |
| 630 http: URL. Returns (location, content). Content is unicode. | |
| 631 | |
| 632 :param url: File path or url. | |
| 633 :param comes_from: Origin description of requirements. | |
| 634 :param session: Instance of pip.download.PipSession. | |
| 635 """ | |
| 636 if session is None: | |
| 637 raise TypeError( | |
| 638 "get_file_content() missing 1 required keyword argument: 'session'" | |
| 639 ) | |
| 640 | |
| 641 match = _scheme_re.search(url) | |
| 642 if match: | |
| 643 scheme = match.group(1).lower() | |
| 644 if (scheme == 'file' and comes_from and | |
| 645 comes_from.startswith('http')): | |
| 646 raise InstallationError( | |
| 647 'Requirements file %s references URL %s, which is local' | |
| 648 % (comes_from, url)) | |
| 649 if scheme == 'file': | |
| 650 path = url.split(':', 1)[1] | |
| 651 path = path.replace('\\', '/') | |
| 652 match = _url_slash_drive_re.match(path) | |
| 653 if match: | |
| 654 path = match.group(1) + ':' + path.split('|', 1)[1] | |
| 655 path = urllib_parse.unquote(path) | |
| 656 if path.startswith('/'): | |
| 657 path = '/' + path.lstrip('/') | |
| 658 url = path | |
| 659 else: | |
| 660 # FIXME: catch some errors | |
| 661 resp = session.get(url) | |
| 662 resp.raise_for_status() | |
| 663 return resp.url, resp.text | |
| 664 try: | |
| 665 with open(url, 'rb') as f: | |
| 666 content = auto_decode(f.read()) | |
| 667 except IOError as exc: | |
| 668 raise InstallationError( | |
| 669 'Could not open requirements file: %s' % str(exc) | |
| 670 ) | |
| 671 return url, content | |
| 672 | |
| 673 | |
| 674 _scheme_re = re.compile(r'^(http|https|file):', re.I) | |
| 675 _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I) | |
| 676 | |
| 677 | |
| 678 def is_url(name): | |
| 679 # type: (Union[str, Text]) -> bool | |
| 680 """Returns true if the name looks like a URL""" | |
| 681 if ':' not in name: | |
| 682 return False | |
| 683 scheme = name.split(':', 1)[0].lower() | |
| 684 return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes | |
| 685 | |
| 686 | |
| 687 def url_to_path(url): | |
| 688 # type: (str) -> str | |
| 689 """ | |
| 690 Convert a file: URL to a path. | |
| 691 """ | |
| 692 assert url.startswith('file:'), ( | |
| 693 "You can only turn file: urls into filenames (not %r)" % url) | |
| 694 | |
| 695 _, netloc, path, _, _ = urllib_parse.urlsplit(url) | |
| 696 | |
| 697 if not netloc or netloc == 'localhost': | |
| 698 # According to RFC 8089, same as empty authority. | |
| 699 netloc = '' | |
| 700 elif sys.platform == 'win32': | |
| 701 # If we have a UNC path, prepend UNC share notation. | |
| 702 netloc = '\\\\' + netloc | |
| 703 else: | |
| 704 raise ValueError( | |
| 705 'non-local file URIs are not supported on this platform: %r' | |
| 706 % url | |
| 707 ) | |
| 708 | |
| 709 path = urllib_request.url2pathname(netloc + path) | |
| 710 return path | |
| 711 | |
| 712 | |
| 713 def is_archive_file(name): | |
| 714 # type: (str) -> bool | |
| 715 """Return True if `name` is a considered as an archive file.""" | |
| 716 ext = splitext(name)[1].lower() | |
| 717 if ext in ARCHIVE_EXTENSIONS: | |
| 718 return True | |
| 719 return False | |
| 720 | |
| 721 | |
| 722 def unpack_vcs_link(link, location): | |
| 723 vcs_backend = _get_used_vcs_backend(link) | |
| 724 vcs_backend.unpack(location, url=link.url) | |
| 725 | |
| 726 | |
| 727 def _get_used_vcs_backend(link): | |
| 728 # type: (Link) -> Optional[VersionControl] | |
| 729 """ | |
| 730 Return a VersionControl object or None. | |
| 731 """ | |
| 732 for vcs_backend in vcs.backends: | |
| 733 if link.scheme in vcs_backend.schemes: | |
| 734 return vcs_backend | |
| 735 return None | |
| 736 | |
| 737 | |
| 738 def is_vcs_url(link): | |
| 739 # type: (Link) -> bool | |
| 740 return bool(_get_used_vcs_backend(link)) | |
| 741 | |
| 742 | |
| 743 def is_file_url(link): | |
| 744 # type: (Link) -> bool | |
| 745 return link.url.lower().startswith('file:') | |
| 746 | |
| 747 | |
| 748 def is_dir_url(link): | |
| 749 # type: (Link) -> bool | |
| 750 """Return whether a file:// Link points to a directory. | |
| 751 | |
| 752 ``link`` must not have any other scheme but file://. Call is_file_url() | |
| 753 first. | |
| 754 | |
| 755 """ | |
| 756 link_path = url_to_path(link.url_without_fragment) | |
| 757 return os.path.isdir(link_path) | |
| 758 | |
| 759 | |
| 760 def _progress_indicator(iterable, *args, **kwargs): | |
| 761 return iterable | |
| 762 | |
| 763 | |
| 764 def _download_url( | |
| 765 resp, # type: Response | |
| 766 link, # type: Link | |
| 767 content_file, # type: IO | |
| 768 hashes, # type: Optional[Hashes] | |
| 769 progress_bar # type: str | |
| 770 ): | |
| 771 # type: (...) -> None | |
| 772 try: | |
| 773 total_length = int(resp.headers['content-length']) | |
| 774 except (ValueError, KeyError, TypeError): | |
| 775 total_length = 0 | |
| 776 | |
| 777 cached_resp = getattr(resp, "from_cache", False) | |
| 778 if logger.getEffectiveLevel() > logging.INFO: | |
| 779 show_progress = False | |
| 780 elif cached_resp: | |
| 781 show_progress = False | |
| 782 elif total_length > (40 * 1000): | |
| 783 show_progress = True | |
| 784 elif not total_length: | |
| 785 show_progress = True | |
| 786 else: | |
| 787 show_progress = False | |
| 788 | |
| 789 show_url = link.show_url | |
| 790 | |
| 791 def resp_read(chunk_size): | |
| 792 try: | |
| 793 # Special case for urllib3. | |
| 794 for chunk in resp.raw.stream( | |
| 795 chunk_size, | |
| 796 # We use decode_content=False here because we don't | |
| 797 # want urllib3 to mess with the raw bytes we get | |
| 798 # from the server. If we decompress inside of | |
| 799 # urllib3 then we cannot verify the checksum | |
| 800 # because the checksum will be of the compressed | |
| 801 # file. This breakage will only occur if the | |
| 802 # server adds a Content-Encoding header, which | |
| 803 # depends on how the server was configured: | |
| 804 # - Some servers will notice that the file isn't a | |
| 805 # compressible file and will leave the file alone | |
| 806 # and with an empty Content-Encoding | |
| 807 # - Some servers will notice that the file is | |
| 808 # already compressed and will leave the file | |
| 809 # alone and will add a Content-Encoding: gzip | |
| 810 # header | |
| 811 # - Some servers won't notice anything at all and | |
| 812 # will take a file that's already been compressed | |
| 813 # and compress it again and set the | |
| 814 # Content-Encoding: gzip header | |
| 815 # | |
| 816 # By setting this not to decode automatically we | |
| 817 # hope to eliminate problems with the second case. | |
| 818 decode_content=False): | |
| 819 yield chunk | |
| 820 except AttributeError: | |
| 821 # Standard file-like object. | |
| 822 while True: | |
| 823 chunk = resp.raw.read(chunk_size) | |
| 824 if not chunk: | |
| 825 break | |
| 826 yield chunk | |
| 827 | |
| 828 def written_chunks(chunks): | |
| 829 for chunk in chunks: | |
| 830 content_file.write(chunk) | |
| 831 yield chunk | |
| 832 | |
| 833 progress_indicator = _progress_indicator | |
| 834 | |
| 835 if link.netloc == PyPI.netloc: | |
| 836 url = show_url | |
| 837 else: | |
| 838 url = link.url_without_fragment | |
| 839 | |
| 840 if show_progress: # We don't show progress on cached responses | |
| 841 progress_indicator = DownloadProgressProvider(progress_bar, | |
| 842 max=total_length) | |
| 843 if total_length: | |
| 844 logger.info("Downloading %s (%s)", url, format_size(total_length)) | |
| 845 else: | |
| 846 logger.info("Downloading %s", url) | |
| 847 elif cached_resp: | |
| 848 logger.info("Using cached %s", url) | |
| 849 else: | |
| 850 logger.info("Downloading %s", url) | |
| 851 | |
| 852 logger.debug('Downloading from URL %s', link) | |
| 853 | |
| 854 downloaded_chunks = written_chunks( | |
| 855 progress_indicator( | |
| 856 resp_read(CONTENT_CHUNK_SIZE), | |
| 857 CONTENT_CHUNK_SIZE | |
| 858 ) | |
| 859 ) | |
| 860 if hashes: | |
| 861 hashes.check_against_chunks(downloaded_chunks) | |
| 862 else: | |
| 863 consume(downloaded_chunks) | |
| 864 | |
| 865 | |
| 866 def _copy_file(filename, location, link): | |
| 867 copy = True | |
| 868 download_location = os.path.join(location, link.filename) | |
| 869 if os.path.exists(download_location): | |
| 870 response = ask_path_exists( | |
| 871 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' % | |
| 872 display_path(download_location), ('i', 'w', 'b', 'a')) | |
| 873 if response == 'i': | |
| 874 copy = False | |
| 875 elif response == 'w': | |
| 876 logger.warning('Deleting %s', display_path(download_location)) | |
| 877 os.remove(download_location) | |
| 878 elif response == 'b': | |
| 879 dest_file = backup_dir(download_location) | |
| 880 logger.warning( | |
| 881 'Backing up %s to %s', | |
| 882 display_path(download_location), | |
| 883 display_path(dest_file), | |
| 884 ) | |
| 885 shutil.move(download_location, dest_file) | |
| 886 elif response == 'a': | |
| 887 sys.exit(-1) | |
| 888 if copy: | |
| 889 shutil.copy(filename, download_location) | |
| 890 logger.info('Saved %s', display_path(download_location)) | |
| 891 | |
| 892 | |
| 893 def unpack_http_url( | |
| 894 link, # type: Link | |
| 895 location, # type: str | |
| 896 download_dir=None, # type: Optional[str] | |
| 897 session=None, # type: Optional[PipSession] | |
| 898 hashes=None, # type: Optional[Hashes] | |
| 899 progress_bar="on" # type: str | |
| 900 ): | |
| 901 # type: (...) -> None | |
| 902 if session is None: | |
| 903 raise TypeError( | |
| 904 "unpack_http_url() missing 1 required keyword argument: 'session'" | |
| 905 ) | |
| 906 | |
| 907 with TempDirectory(kind="unpack") as temp_dir: | |
| 908 # If a download dir is specified, is the file already downloaded there? | |
| 909 already_downloaded_path = None | |
| 910 if download_dir: | |
| 911 already_downloaded_path = _check_download_dir(link, | |
| 912 download_dir, | |
| 913 hashes) | |
| 914 | |
| 915 if already_downloaded_path: | |
| 916 from_path = already_downloaded_path | |
| 917 content_type = mimetypes.guess_type(from_path)[0] | |
| 918 else: | |
| 919 # let's download to a tmp dir | |
| 920 from_path, content_type = _download_http_url(link, | |
| 921 session, | |
| 922 temp_dir.path, | |
| 923 hashes, | |
| 924 progress_bar) | |
| 925 | |
| 926 # unpack the archive to the build dir location. even when only | |
| 927 # downloading archives, they have to be unpacked to parse dependencies | |
| 928 unpack_file(from_path, location, content_type, link) | |
| 929 | |
| 930 # a download dir is specified; let's copy the archive there | |
| 931 if download_dir and not already_downloaded_path: | |
| 932 _copy_file(from_path, download_dir, link) | |
| 933 | |
| 934 if not already_downloaded_path: | |
| 935 os.unlink(from_path) | |
| 936 | |
| 937 | |
| 938 def unpack_file_url( | |
| 939 link, # type: Link | |
| 940 location, # type: str | |
| 941 download_dir=None, # type: Optional[str] | |
| 942 hashes=None # type: Optional[Hashes] | |
| 943 ): | |
| 944 # type: (...) -> None | |
| 945 """Unpack link into location. | |
| 946 | |
| 947 If download_dir is provided and link points to a file, make a copy | |
| 948 of the link file inside download_dir. | |
| 949 """ | |
| 950 link_path = url_to_path(link.url_without_fragment) | |
| 951 | |
| 952 # If it's a url to a local directory | |
| 953 if is_dir_url(link): | |
| 954 if os.path.isdir(location): | |
| 955 rmtree(location) | |
| 956 shutil.copytree(link_path, location, symlinks=True) | |
| 957 if download_dir: | |
| 958 logger.info('Link is a directory, ignoring download_dir') | |
| 959 return | |
| 960 | |
| 961 # If --require-hashes is off, `hashes` is either empty, the | |
| 962 # link's embedded hash, or MissingHashes; it is required to | |
| 963 # match. If --require-hashes is on, we are satisfied by any | |
| 964 # hash in `hashes` matching: a URL-based or an option-based | |
| 965 # one; no internet-sourced hash will be in `hashes`. | |
| 966 if hashes: | |
| 967 hashes.check_against_path(link_path) | |
| 968 | |
| 969 # If a download dir is specified, is the file already there and valid? | |
| 970 already_downloaded_path = None | |
| 971 if download_dir: | |
| 972 already_downloaded_path = _check_download_dir(link, | |
| 973 download_dir, | |
| 974 hashes) | |
| 975 | |
| 976 if already_downloaded_path: | |
| 977 from_path = already_downloaded_path | |
| 978 else: | |
| 979 from_path = link_path | |
| 980 | |
| 981 content_type = mimetypes.guess_type(from_path)[0] | |
| 982 | |
| 983 # unpack the archive to the build dir location. even when only downloading | |
| 984 # archives, they have to be unpacked to parse dependencies | |
| 985 unpack_file(from_path, location, content_type, link) | |
| 986 | |
| 987 # a download dir is specified and not already downloaded | |
| 988 if download_dir and not already_downloaded_path: | |
| 989 _copy_file(from_path, download_dir, link) | |
| 990 | |
| 991 | |
| 992 class PipXmlrpcTransport(xmlrpc_client.Transport): | |
| 993 """Provide a `xmlrpclib.Transport` implementation via a `PipSession` | |
| 994 object. | |
| 995 """ | |
| 996 | |
| 997 def __init__(self, index_url, session, use_datetime=False): | |
| 998 xmlrpc_client.Transport.__init__(self, use_datetime) | |
| 999 index_parts = urllib_parse.urlparse(index_url) | |
| 1000 self._scheme = index_parts.scheme | |
| 1001 self._session = session | |
| 1002 | |
| 1003 def request(self, host, handler, request_body, verbose=False): | |
| 1004 parts = (self._scheme, host, handler, None, None, None) | |
| 1005 url = urllib_parse.urlunparse(parts) | |
| 1006 try: | |
| 1007 headers = {'Content-Type': 'text/xml'} | |
| 1008 response = self._session.post(url, data=request_body, | |
| 1009 headers=headers, stream=True) | |
| 1010 response.raise_for_status() | |
| 1011 self.verbose = verbose | |
| 1012 return self.parse_response(response.raw) | |
| 1013 except requests.HTTPError as exc: | |
| 1014 logger.critical( | |
| 1015 "HTTP error %s while getting %s", | |
| 1016 exc.response.status_code, url, | |
| 1017 ) | |
| 1018 raise | |
| 1019 | |
| 1020 | |
| 1021 def unpack_url( | |
| 1022 link, # type: Link | |
| 1023 location, # type: str | |
| 1024 download_dir=None, # type: Optional[str] | |
| 1025 only_download=False, # type: bool | |
| 1026 session=None, # type: Optional[PipSession] | |
| 1027 hashes=None, # type: Optional[Hashes] | |
| 1028 progress_bar="on" # type: str | |
| 1029 ): | |
| 1030 # type: (...) -> None | |
| 1031 """Unpack link. | |
| 1032 If link is a VCS link: | |
| 1033 if only_download, export into download_dir and ignore location | |
| 1034 else unpack into location | |
| 1035 for other types of link: | |
| 1036 - unpack into location | |
| 1037 - if download_dir, copy the file into download_dir | |
| 1038 - if only_download, mark location for deletion | |
| 1039 | |
| 1040 :param hashes: A Hashes object, one of whose embedded hashes must match, | |
| 1041 or HashMismatch will be raised. If the Hashes is empty, no matches are | |
| 1042 required, and unhashable types of requirements (like VCS ones, which | |
| 1043 would ordinarily raise HashUnsupported) are allowed. | |
| 1044 """ | |
| 1045 # non-editable vcs urls | |
| 1046 if is_vcs_url(link): | |
| 1047 unpack_vcs_link(link, location) | |
| 1048 | |
| 1049 # file urls | |
| 1050 elif is_file_url(link): | |
| 1051 unpack_file_url(link, location, download_dir, hashes=hashes) | |
| 1052 | |
| 1053 # http urls | |
| 1054 else: | |
| 1055 if session is None: | |
| 1056 session = PipSession() | |
| 1057 | |
| 1058 unpack_http_url( | |
| 1059 link, | |
| 1060 location, | |
| 1061 download_dir, | |
| 1062 session, | |
| 1063 hashes=hashes, | |
| 1064 progress_bar=progress_bar | |
| 1065 ) | |
| 1066 if only_download: | |
| 1067 write_delete_marker_file(location) | |
| 1068 | |
| 1069 | |
| 1070 def sanitize_content_filename(filename): | |
| 1071 # type: (str) -> str | |
| 1072 """ | |
| 1073 Sanitize the "filename" value from a Content-Disposition header. | |
| 1074 """ | |
| 1075 return os.path.basename(filename) | |
| 1076 | |
| 1077 | |
| 1078 def parse_content_disposition(content_disposition, default_filename): | |
| 1079 # type: (str, str) -> str | |
| 1080 """ | |
| 1081 Parse the "filename" value from a Content-Disposition header, and | |
| 1082 return the default filename if the result is empty. | |
| 1083 """ | |
| 1084 _type, params = cgi.parse_header(content_disposition) | |
| 1085 filename = params.get('filename') | |
| 1086 if filename: | |
| 1087 # We need to sanitize the filename to prevent directory traversal | |
| 1088 # in case the filename contains ".." path parts. | |
| 1089 filename = sanitize_content_filename(filename) | |
| 1090 return filename or default_filename | |
| 1091 | |
| 1092 | |
| 1093 def _download_http_url( | |
| 1094 link, # type: Link | |
| 1095 session, # type: PipSession | |
| 1096 temp_dir, # type: str | |
| 1097 hashes, # type: Optional[Hashes] | |
| 1098 progress_bar # type: str | |
| 1099 ): | |
| 1100 # type: (...) -> Tuple[str, str] | |
| 1101 """Download link url into temp_dir using provided session""" | |
| 1102 target_url = link.url.split('#', 1)[0] | |
| 1103 try: | |
| 1104 resp = session.get( | |
| 1105 target_url, | |
| 1106 # We use Accept-Encoding: identity here because requests | |
| 1107 # defaults to accepting compressed responses. This breaks in | |
| 1108 # a variety of ways depending on how the server is configured. | |
| 1109 # - Some servers will notice that the file isn't a compressible | |
| 1110 # file and will leave the file alone and with an empty | |
| 1111 # Content-Encoding | |
| 1112 # - Some servers will notice that the file is already | |
| 1113 # compressed and will leave the file alone and will add a | |
| 1114 # Content-Encoding: gzip header | |
| 1115 # - Some servers won't notice anything at all and will take | |
| 1116 # a file that's already been compressed and compress it again | |
| 1117 # and set the Content-Encoding: gzip header | |
| 1118 # By setting this to request only the identity encoding We're | |
| 1119 # hoping to eliminate the third case. Hopefully there does not | |
| 1120 # exist a server which when given a file will notice it is | |
| 1121 # already compressed and that you're not asking for a | |
| 1122 # compressed file and will then decompress it before sending | |
| 1123 # because if that's the case I don't think it'll ever be | |
| 1124 # possible to make this work. | |
| 1125 headers={"Accept-Encoding": "identity"}, | |
| 1126 stream=True, | |
| 1127 ) | |
| 1128 resp.raise_for_status() | |
| 1129 except requests.HTTPError as exc: | |
| 1130 logger.critical( | |
| 1131 "HTTP error %s while getting %s", exc.response.status_code, link, | |
| 1132 ) | |
| 1133 raise | |
| 1134 | |
| 1135 content_type = resp.headers.get('content-type', '') | |
| 1136 filename = link.filename # fallback | |
| 1137 # Have a look at the Content-Disposition header for a better guess | |
| 1138 content_disposition = resp.headers.get('content-disposition') | |
| 1139 if content_disposition: | |
| 1140 filename = parse_content_disposition(content_disposition, filename) | |
| 1141 ext = splitext(filename)[1] # type: Optional[str] | |
| 1142 if not ext: | |
| 1143 ext = mimetypes.guess_extension(content_type) | |
| 1144 if ext: | |
| 1145 filename += ext | |
| 1146 if not ext and link.url != resp.url: | |
| 1147 ext = os.path.splitext(resp.url)[1] | |
| 1148 if ext: | |
| 1149 filename += ext | |
| 1150 file_path = os.path.join(temp_dir, filename) | |
| 1151 with open(file_path, 'wb') as content_file: | |
| 1152 _download_url(resp, link, content_file, hashes, progress_bar) | |
| 1153 return file_path, content_type | |
| 1154 | |
| 1155 | |
| 1156 def _check_download_dir(link, download_dir, hashes): | |
| 1157 # type: (Link, str, Optional[Hashes]) -> Optional[str] | |
| 1158 """ Check download_dir for previously downloaded file with correct hash | |
| 1159 If a correct file is found return its path else None | |
| 1160 """ | |
| 1161 download_path = os.path.join(download_dir, link.filename) | |
| 1162 if os.path.exists(download_path): | |
| 1163 # If already downloaded, does its hash match? | |
| 1164 logger.info('File was already downloaded %s', download_path) | |
| 1165 if hashes: | |
| 1166 try: | |
| 1167 hashes.check_against_path(download_path) | |
| 1168 except HashMismatch: | |
| 1169 logger.warning( | |
| 1170 'Previously-downloaded file %s has bad hash. ' | |
| 1171 'Re-downloading.', | |
| 1172 download_path | |
| 1173 ) | |
| 1174 os.unlink(download_path) | |
| 1175 return None | |
| 1176 return download_path | |
| 1177 return None |
