Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/pip/_internal/index.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:32:28 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:d30785e31577 | 1:56ad4e20f292 |
|---|---|
| 1 """Routines related to PyPI, indexes""" | |
| 2 from __future__ import absolute_import | |
| 3 | |
| 4 import cgi | |
| 5 import itertools | |
| 6 import logging | |
| 7 import mimetypes | |
| 8 import os | |
| 9 import re | |
| 10 | |
| 11 from pip._vendor import html5lib, requests, six | |
| 12 from pip._vendor.distlib.compat import unescape | |
| 13 from pip._vendor.packaging import specifiers | |
| 14 from pip._vendor.packaging.utils import canonicalize_name | |
| 15 from pip._vendor.packaging.version import parse as parse_version | |
| 16 from pip._vendor.requests.exceptions import HTTPError, RetryError, SSLError | |
| 17 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
| 18 from pip._vendor.six.moves.urllib import request as urllib_request | |
| 19 | |
| 20 from pip._internal.download import is_url, url_to_path | |
| 21 from pip._internal.exceptions import ( | |
| 22 BestVersionAlreadyInstalled, DistributionNotFound, InvalidWheelFilename, | |
| 23 UnsupportedWheel, | |
| 24 ) | |
| 25 from pip._internal.models.candidate import InstallationCandidate | |
| 26 from pip._internal.models.format_control import FormatControl | |
| 27 from pip._internal.models.link import Link | |
| 28 from pip._internal.models.selection_prefs import SelectionPreferences | |
| 29 from pip._internal.models.target_python import TargetPython | |
| 30 from pip._internal.utils.compat import ipaddress | |
| 31 from pip._internal.utils.logging import indent_log | |
| 32 from pip._internal.utils.misc import ( | |
| 33 ARCHIVE_EXTENSIONS, SUPPORTED_EXTENSIONS, WHEEL_EXTENSION, path_to_url, | |
| 34 redact_password_from_url, | |
| 35 ) | |
| 36 from pip._internal.utils.packaging import check_requires_python | |
| 37 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
| 38 from pip._internal.wheel import Wheel | |
| 39 | |
| 40 if MYPY_CHECK_RUNNING: | |
| 41 from logging import Logger | |
| 42 from typing import ( | |
| 43 Any, Callable, FrozenSet, Iterable, Iterator, List, MutableMapping, | |
| 44 Optional, Sequence, Set, Text, Tuple, Union, | |
| 45 ) | |
| 46 import xml.etree.ElementTree | |
| 47 from pip._vendor.packaging.version import _BaseVersion | |
| 48 from pip._vendor.requests import Response | |
| 49 from pip._internal.models.search_scope import SearchScope | |
| 50 from pip._internal.req import InstallRequirement | |
| 51 from pip._internal.download import PipSession | |
| 52 from pip._internal.pep425tags import Pep425Tag | |
| 53 from pip._internal.utils.hashes import Hashes | |
| 54 | |
| 55 BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str] | |
| 56 CandidateSortingKey = ( | |
| 57 Tuple[int, int, int, _BaseVersion, BuildTag, Optional[int]] | |
| 58 ) | |
| 59 HTMLElement = xml.etree.ElementTree.Element | |
| 60 SecureOrigin = Tuple[str, str, Optional[str]] | |
| 61 | |
| 62 | |
| 63 __all__ = ['FormatControl', 'FoundCandidates', 'PackageFinder'] | |
| 64 | |
| 65 | |
| 66 SECURE_ORIGINS = [ | |
| 67 # protocol, hostname, port | |
| 68 # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC) | |
| 69 ("https", "*", "*"), | |
| 70 ("*", "localhost", "*"), | |
| 71 ("*", "127.0.0.0/8", "*"), | |
| 72 ("*", "::1/128", "*"), | |
| 73 ("file", "*", None), | |
| 74 # ssh is always secure. | |
| 75 ("ssh", "*", "*"), | |
| 76 ] # type: List[SecureOrigin] | |
| 77 | |
| 78 | |
| 79 logger = logging.getLogger(__name__) | |
| 80 | |
| 81 | |
| 82 def _match_vcs_scheme(url): | |
| 83 # type: (str) -> Optional[str] | |
| 84 """Look for VCS schemes in the URL. | |
| 85 | |
| 86 Returns the matched VCS scheme, or None if there's no match. | |
| 87 """ | |
| 88 from pip._internal.vcs import vcs | |
| 89 for scheme in vcs.schemes: | |
| 90 if url.lower().startswith(scheme) and url[len(scheme)] in '+:': | |
| 91 return scheme | |
| 92 return None | |
| 93 | |
| 94 | |
| 95 def _is_url_like_archive(url): | |
| 96 # type: (str) -> bool | |
| 97 """Return whether the URL looks like an archive. | |
| 98 """ | |
| 99 filename = Link(url).filename | |
| 100 for bad_ext in ARCHIVE_EXTENSIONS: | |
| 101 if filename.endswith(bad_ext): | |
| 102 return True | |
| 103 return False | |
| 104 | |
| 105 | |
| 106 class _NotHTML(Exception): | |
| 107 def __init__(self, content_type, request_desc): | |
| 108 # type: (str, str) -> None | |
| 109 super(_NotHTML, self).__init__(content_type, request_desc) | |
| 110 self.content_type = content_type | |
| 111 self.request_desc = request_desc | |
| 112 | |
| 113 | |
| 114 def _ensure_html_header(response): | |
| 115 # type: (Response) -> None | |
| 116 """Check the Content-Type header to ensure the response contains HTML. | |
| 117 | |
| 118 Raises `_NotHTML` if the content type is not text/html. | |
| 119 """ | |
| 120 content_type = response.headers.get("Content-Type", "") | |
| 121 if not content_type.lower().startswith("text/html"): | |
| 122 raise _NotHTML(content_type, response.request.method) | |
| 123 | |
| 124 | |
| 125 class _NotHTTP(Exception): | |
| 126 pass | |
| 127 | |
| 128 | |
| 129 def _ensure_html_response(url, session): | |
| 130 # type: (str, PipSession) -> None | |
| 131 """Send a HEAD request to the URL, and ensure the response contains HTML. | |
| 132 | |
| 133 Raises `_NotHTTP` if the URL is not available for a HEAD request, or | |
| 134 `_NotHTML` if the content type is not text/html. | |
| 135 """ | |
| 136 scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url) | |
| 137 if scheme not in {'http', 'https'}: | |
| 138 raise _NotHTTP() | |
| 139 | |
| 140 resp = session.head(url, allow_redirects=True) | |
| 141 resp.raise_for_status() | |
| 142 | |
| 143 _ensure_html_header(resp) | |
| 144 | |
| 145 | |
| 146 def _get_html_response(url, session): | |
| 147 # type: (str, PipSession) -> Response | |
| 148 """Access an HTML page with GET, and return the response. | |
| 149 | |
| 150 This consists of three parts: | |
| 151 | |
| 152 1. If the URL looks suspiciously like an archive, send a HEAD first to | |
| 153 check the Content-Type is HTML, to avoid downloading a large file. | |
| 154 Raise `_NotHTTP` if the content type cannot be determined, or | |
| 155 `_NotHTML` if it is not HTML. | |
| 156 2. Actually perform the request. Raise HTTP exceptions on network failures. | |
| 157 3. Check the Content-Type header to make sure we got HTML, and raise | |
| 158 `_NotHTML` otherwise. | |
| 159 """ | |
| 160 if _is_url_like_archive(url): | |
| 161 _ensure_html_response(url, session=session) | |
| 162 | |
| 163 logger.debug('Getting page %s', redact_password_from_url(url)) | |
| 164 | |
| 165 resp = session.get( | |
| 166 url, | |
| 167 headers={ | |
| 168 "Accept": "text/html", | |
| 169 # We don't want to blindly returned cached data for | |
| 170 # /simple/, because authors generally expecting that | |
| 171 # twine upload && pip install will function, but if | |
| 172 # they've done a pip install in the last ~10 minutes | |
| 173 # it won't. Thus by setting this to zero we will not | |
| 174 # blindly use any cached data, however the benefit of | |
| 175 # using max-age=0 instead of no-cache, is that we will | |
| 176 # still support conditional requests, so we will still | |
| 177 # minimize traffic sent in cases where the page hasn't | |
| 178 # changed at all, we will just always incur the round | |
| 179 # trip for the conditional GET now instead of only | |
| 180 # once per 10 minutes. | |
| 181 # For more information, please see pypa/pip#5670. | |
| 182 "Cache-Control": "max-age=0", | |
| 183 }, | |
| 184 ) | |
| 185 resp.raise_for_status() | |
| 186 | |
| 187 # The check for archives above only works if the url ends with | |
| 188 # something that looks like an archive. However that is not a | |
| 189 # requirement of an url. Unless we issue a HEAD request on every | |
| 190 # url we cannot know ahead of time for sure if something is HTML | |
| 191 # or not. However we can check after we've downloaded it. | |
| 192 _ensure_html_header(resp) | |
| 193 | |
| 194 return resp | |
| 195 | |
| 196 | |
| 197 def _handle_get_page_fail( | |
| 198 link, # type: Link | |
| 199 reason, # type: Union[str, Exception] | |
| 200 meth=None # type: Optional[Callable[..., None]] | |
| 201 ): | |
| 202 # type: (...) -> None | |
| 203 if meth is None: | |
| 204 meth = logger.debug | |
| 205 meth("Could not fetch URL %s: %s - skipping", link, reason) | |
| 206 | |
| 207 | |
| 208 def _get_html_page(link, session=None): | |
| 209 # type: (Link, Optional[PipSession]) -> Optional[HTMLPage] | |
| 210 if session is None: | |
| 211 raise TypeError( | |
| 212 "_get_html_page() missing 1 required keyword argument: 'session'" | |
| 213 ) | |
| 214 | |
| 215 url = link.url.split('#', 1)[0] | |
| 216 | |
| 217 # Check for VCS schemes that do not support lookup as web pages. | |
| 218 vcs_scheme = _match_vcs_scheme(url) | |
| 219 if vcs_scheme: | |
| 220 logger.debug('Cannot look at %s URL %s', vcs_scheme, link) | |
| 221 return None | |
| 222 | |
| 223 # Tack index.html onto file:// URLs that point to directories | |
| 224 scheme, _, path, _, _, _ = urllib_parse.urlparse(url) | |
| 225 if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): | |
| 226 # add trailing slash if not present so urljoin doesn't trim | |
| 227 # final segment | |
| 228 if not url.endswith('/'): | |
| 229 url += '/' | |
| 230 url = urllib_parse.urljoin(url, 'index.html') | |
| 231 logger.debug(' file: URL is directory, getting %s', url) | |
| 232 | |
| 233 try: | |
| 234 resp = _get_html_response(url, session=session) | |
| 235 except _NotHTTP: | |
| 236 logger.debug( | |
| 237 'Skipping page %s because it looks like an archive, and cannot ' | |
| 238 'be checked by HEAD.', link, | |
| 239 ) | |
| 240 except _NotHTML as exc: | |
| 241 logger.debug( | |
| 242 'Skipping page %s because the %s request got Content-Type: %s', | |
| 243 link, exc.request_desc, exc.content_type, | |
| 244 ) | |
| 245 except HTTPError as exc: | |
| 246 _handle_get_page_fail(link, exc) | |
| 247 except RetryError as exc: | |
| 248 _handle_get_page_fail(link, exc) | |
| 249 except SSLError as exc: | |
| 250 reason = "There was a problem confirming the ssl certificate: " | |
| 251 reason += str(exc) | |
| 252 _handle_get_page_fail(link, reason, meth=logger.info) | |
| 253 except requests.ConnectionError as exc: | |
| 254 _handle_get_page_fail(link, "connection error: %s" % exc) | |
| 255 except requests.Timeout: | |
| 256 _handle_get_page_fail(link, "timed out") | |
| 257 else: | |
| 258 return HTMLPage(resp.content, resp.url, resp.headers) | |
| 259 return None | |
| 260 | |
| 261 | |
| 262 def _check_link_requires_python( | |
| 263 link, # type: Link | |
| 264 version_info, # type: Tuple[int, int, int] | |
| 265 ignore_requires_python=False, # type: bool | |
| 266 ): | |
| 267 # type: (...) -> bool | |
| 268 """ | |
| 269 Return whether the given Python version is compatible with a link's | |
| 270 "Requires-Python" value. | |
| 271 | |
| 272 :param version_info: A 3-tuple of ints representing the Python | |
| 273 major-minor-micro version to check. | |
| 274 :param ignore_requires_python: Whether to ignore the "Requires-Python" | |
| 275 value if the given Python version isn't compatible. | |
| 276 """ | |
| 277 try: | |
| 278 is_compatible = check_requires_python( | |
| 279 link.requires_python, version_info=version_info, | |
| 280 ) | |
| 281 except specifiers.InvalidSpecifier: | |
| 282 logger.debug( | |
| 283 "Ignoring invalid Requires-Python (%r) for link: %s", | |
| 284 link.requires_python, link, | |
| 285 ) | |
| 286 else: | |
| 287 if not is_compatible: | |
| 288 version = '.'.join(map(str, version_info)) | |
| 289 if not ignore_requires_python: | |
| 290 logger.debug( | |
| 291 'Link requires a different Python (%s not in: %r): %s', | |
| 292 version, link.requires_python, link, | |
| 293 ) | |
| 294 return False | |
| 295 | |
| 296 logger.debug( | |
| 297 'Ignoring failed Requires-Python check (%s not in: %r) ' | |
| 298 'for link: %s', | |
| 299 version, link.requires_python, link, | |
| 300 ) | |
| 301 | |
| 302 return True | |
| 303 | |
| 304 | |
| 305 class LinkEvaluator(object): | |
| 306 | |
| 307 """ | |
| 308 Responsible for evaluating links for a particular project. | |
| 309 """ | |
| 310 | |
| 311 _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$') | |
| 312 | |
| 313 # Don't include an allow_yanked default value to make sure each call | |
| 314 # site considers whether yanked releases are allowed. This also causes | |
| 315 # that decision to be made explicit in the calling code, which helps | |
| 316 # people when reading the code. | |
| 317 def __init__( | |
| 318 self, | |
| 319 project_name, # type: str | |
| 320 canonical_name, # type: str | |
| 321 formats, # type: FrozenSet | |
| 322 target_python, # type: TargetPython | |
| 323 allow_yanked, # type: bool | |
| 324 ignore_requires_python=None, # type: Optional[bool] | |
| 325 ): | |
| 326 # type: (...) -> None | |
| 327 """ | |
| 328 :param project_name: The user supplied package name. | |
| 329 :param canonical_name: The canonical package name. | |
| 330 :param formats: The formats allowed for this package. Should be a set | |
| 331 with 'binary' or 'source' or both in it. | |
| 332 :param target_python: The target Python interpreter to use when | |
| 333 evaluating link compatibility. This is used, for example, to | |
| 334 check wheel compatibility, as well as when checking the Python | |
| 335 version, e.g. the Python version embedded in a link filename | |
| 336 (or egg fragment) and against an HTML link's optional PEP 503 | |
| 337 "data-requires-python" attribute. | |
| 338 :param allow_yanked: Whether files marked as yanked (in the sense | |
| 339 of PEP 592) are permitted to be candidates for install. | |
| 340 :param ignore_requires_python: Whether to ignore incompatible | |
| 341 PEP 503 "data-requires-python" values in HTML links. Defaults | |
| 342 to False. | |
| 343 """ | |
| 344 if ignore_requires_python is None: | |
| 345 ignore_requires_python = False | |
| 346 | |
| 347 self._allow_yanked = allow_yanked | |
| 348 self._canonical_name = canonical_name | |
| 349 self._ignore_requires_python = ignore_requires_python | |
| 350 self._formats = formats | |
| 351 self._target_python = target_python | |
| 352 | |
| 353 self.project_name = project_name | |
| 354 | |
| 355 def evaluate_link(self, link): | |
| 356 # type: (Link) -> Tuple[bool, Optional[Text]] | |
| 357 """ | |
| 358 Determine whether a link is a candidate for installation. | |
| 359 | |
| 360 :return: A tuple (is_candidate, result), where `result` is (1) a | |
| 361 version string if `is_candidate` is True, and (2) if | |
| 362 `is_candidate` is False, an optional string to log the reason | |
| 363 the link fails to qualify. | |
| 364 """ | |
| 365 version = None | |
| 366 if link.is_yanked and not self._allow_yanked: | |
| 367 reason = link.yanked_reason or '<none given>' | |
| 368 # Mark this as a unicode string to prevent "UnicodeEncodeError: | |
| 369 # 'ascii' codec can't encode character" in Python 2 when | |
| 370 # the reason contains non-ascii characters. | |
| 371 return (False, u'yanked for reason: {}'.format(reason)) | |
| 372 | |
| 373 if link.egg_fragment: | |
| 374 egg_info = link.egg_fragment | |
| 375 ext = link.ext | |
| 376 else: | |
| 377 egg_info, ext = link.splitext() | |
| 378 if not ext: | |
| 379 return (False, 'not a file') | |
| 380 if ext not in SUPPORTED_EXTENSIONS: | |
| 381 return (False, 'unsupported archive format: %s' % ext) | |
| 382 if "binary" not in self._formats and ext == WHEEL_EXTENSION: | |
| 383 reason = 'No binaries permitted for %s' % self.project_name | |
| 384 return (False, reason) | |
| 385 if "macosx10" in link.path and ext == '.zip': | |
| 386 return (False, 'macosx10 one') | |
| 387 if ext == WHEEL_EXTENSION: | |
| 388 try: | |
| 389 wheel = Wheel(link.filename) | |
| 390 except InvalidWheelFilename: | |
| 391 return (False, 'invalid wheel filename') | |
| 392 if canonicalize_name(wheel.name) != self._canonical_name: | |
| 393 reason = 'wrong project name (not %s)' % self.project_name | |
| 394 return (False, reason) | |
| 395 | |
| 396 supported_tags = self._target_python.get_tags() | |
| 397 if not wheel.supported(supported_tags): | |
| 398 # Include the wheel's tags in the reason string to | |
| 399 # simplify troubleshooting compatibility issues. | |
| 400 file_tags = wheel.get_formatted_file_tags() | |
| 401 reason = ( | |
| 402 "none of the wheel's tags match: {}".format( | |
| 403 ', '.join(file_tags) | |
| 404 ) | |
| 405 ) | |
| 406 return (False, reason) | |
| 407 | |
| 408 version = wheel.version | |
| 409 | |
| 410 # This should be up by the self.ok_binary check, but see issue 2700. | |
| 411 if "source" not in self._formats and ext != WHEEL_EXTENSION: | |
| 412 return (False, 'No sources permitted for %s' % self.project_name) | |
| 413 | |
| 414 if not version: | |
| 415 version = _extract_version_from_fragment( | |
| 416 egg_info, self._canonical_name, | |
| 417 ) | |
| 418 if not version: | |
| 419 return ( | |
| 420 False, 'Missing project version for %s' % self.project_name, | |
| 421 ) | |
| 422 | |
| 423 match = self._py_version_re.search(version) | |
| 424 if match: | |
| 425 version = version[:match.start()] | |
| 426 py_version = match.group(1) | |
| 427 if py_version != self._target_python.py_version: | |
| 428 return (False, 'Python version is incorrect') | |
| 429 | |
| 430 supports_python = _check_link_requires_python( | |
| 431 link, version_info=self._target_python.py_version_info, | |
| 432 ignore_requires_python=self._ignore_requires_python, | |
| 433 ) | |
| 434 if not supports_python: | |
| 435 # Return None for the reason text to suppress calling | |
| 436 # _log_skipped_link(). | |
| 437 return (False, None) | |
| 438 | |
| 439 logger.debug('Found link %s, version: %s', link, version) | |
| 440 | |
| 441 return (True, version) | |
| 442 | |
| 443 | |
| 444 def filter_unallowed_hashes( | |
| 445 candidates, # type: List[InstallationCandidate] | |
| 446 hashes, # type: Hashes | |
| 447 project_name, # type: str | |
| 448 ): | |
| 449 # type: (...) -> List[InstallationCandidate] | |
| 450 """ | |
| 451 Filter out candidates whose hashes aren't allowed, and return a new | |
| 452 list of candidates. | |
| 453 | |
| 454 If at least one candidate has an allowed hash, then all candidates with | |
| 455 either an allowed hash or no hash specified are returned. Otherwise, | |
| 456 the given candidates are returned. | |
| 457 | |
| 458 Including the candidates with no hash specified when there is a match | |
| 459 allows a warning to be logged if there is a more preferred candidate | |
| 460 with no hash specified. Returning all candidates in the case of no | |
| 461 matches lets pip report the hash of the candidate that would otherwise | |
| 462 have been installed (e.g. permitting the user to more easily update | |
| 463 their requirements file with the desired hash). | |
| 464 """ | |
| 465 if not hashes: | |
| 466 logger.debug( | |
| 467 'Given no hashes to check %s links for project %r: ' | |
| 468 'discarding no candidates', | |
| 469 len(candidates), | |
| 470 project_name, | |
| 471 ) | |
| 472 # Make sure we're not returning back the given value. | |
| 473 return list(candidates) | |
| 474 | |
| 475 matches_or_no_digest = [] | |
| 476 # Collect the non-matches for logging purposes. | |
| 477 non_matches = [] | |
| 478 match_count = 0 | |
| 479 for candidate in candidates: | |
| 480 link = candidate.link | |
| 481 if not link.has_hash: | |
| 482 pass | |
| 483 elif link.is_hash_allowed(hashes=hashes): | |
| 484 match_count += 1 | |
| 485 else: | |
| 486 non_matches.append(candidate) | |
| 487 continue | |
| 488 | |
| 489 matches_or_no_digest.append(candidate) | |
| 490 | |
| 491 if match_count: | |
| 492 filtered = matches_or_no_digest | |
| 493 else: | |
| 494 # Make sure we're not returning back the given value. | |
| 495 filtered = list(candidates) | |
| 496 | |
| 497 if len(filtered) == len(candidates): | |
| 498 discard_message = 'discarding no candidates' | |
| 499 else: | |
| 500 discard_message = 'discarding {} non-matches:\n {}'.format( | |
| 501 len(non_matches), | |
| 502 '\n '.join(str(candidate.link) for candidate in non_matches) | |
| 503 ) | |
| 504 | |
| 505 logger.debug( | |
| 506 'Checked %s links for project %r against %s hashes ' | |
| 507 '(%s matches, %s no digest): %s', | |
| 508 len(candidates), | |
| 509 project_name, | |
| 510 hashes.digest_count, | |
| 511 match_count, | |
| 512 len(matches_or_no_digest) - match_count, | |
| 513 discard_message | |
| 514 ) | |
| 515 | |
| 516 return filtered | |
| 517 | |
| 518 | |
| 519 class CandidatePreferences(object): | |
| 520 | |
| 521 """ | |
| 522 Encapsulates some of the preferences for filtering and sorting | |
| 523 InstallationCandidate objects. | |
| 524 """ | |
| 525 | |
| 526 def __init__( | |
| 527 self, | |
| 528 prefer_binary=False, # type: bool | |
| 529 allow_all_prereleases=False, # type: bool | |
| 530 ): | |
| 531 # type: (...) -> None | |
| 532 """ | |
| 533 :param allow_all_prereleases: Whether to allow all pre-releases. | |
| 534 """ | |
| 535 self.allow_all_prereleases = allow_all_prereleases | |
| 536 self.prefer_binary = prefer_binary | |
| 537 | |
| 538 | |
| 539 class CandidateEvaluator(object): | |
| 540 | |
| 541 """ | |
| 542 Responsible for filtering and sorting candidates for installation based | |
| 543 on what tags are valid. | |
| 544 """ | |
| 545 | |
| 546 @classmethod | |
| 547 def create( | |
| 548 cls, | |
| 549 project_name, # type: str | |
| 550 target_python=None, # type: Optional[TargetPython] | |
| 551 prefer_binary=False, # type: bool | |
| 552 allow_all_prereleases=False, # type: bool | |
| 553 specifier=None, # type: Optional[specifiers.BaseSpecifier] | |
| 554 hashes=None, # type: Optional[Hashes] | |
| 555 ): | |
| 556 # type: (...) -> CandidateEvaluator | |
| 557 """Create a CandidateEvaluator object. | |
| 558 | |
| 559 :param target_python: The target Python interpreter to use when | |
| 560 checking compatibility. If None (the default), a TargetPython | |
| 561 object will be constructed from the running Python. | |
| 562 :param hashes: An optional collection of allowed hashes. | |
| 563 """ | |
| 564 if target_python is None: | |
| 565 target_python = TargetPython() | |
| 566 if specifier is None: | |
| 567 specifier = specifiers.SpecifierSet() | |
| 568 | |
| 569 supported_tags = target_python.get_tags() | |
| 570 | |
| 571 return cls( | |
| 572 project_name=project_name, | |
| 573 supported_tags=supported_tags, | |
| 574 specifier=specifier, | |
| 575 prefer_binary=prefer_binary, | |
| 576 allow_all_prereleases=allow_all_prereleases, | |
| 577 hashes=hashes, | |
| 578 ) | |
| 579 | |
| 580 def __init__( | |
| 581 self, | |
| 582 project_name, # type: str | |
| 583 supported_tags, # type: List[Pep425Tag] | |
| 584 specifier, # type: specifiers.BaseSpecifier | |
| 585 prefer_binary=False, # type: bool | |
| 586 allow_all_prereleases=False, # type: bool | |
| 587 hashes=None, # type: Optional[Hashes] | |
| 588 ): | |
| 589 # type: (...) -> None | |
| 590 """ | |
| 591 :param supported_tags: The PEP 425 tags supported by the target | |
| 592 Python in order of preference (most preferred first). | |
| 593 """ | |
| 594 self._allow_all_prereleases = allow_all_prereleases | |
| 595 self._hashes = hashes | |
| 596 self._prefer_binary = prefer_binary | |
| 597 self._project_name = project_name | |
| 598 self._specifier = specifier | |
| 599 self._supported_tags = supported_tags | |
| 600 | |
| 601 def get_applicable_candidates( | |
| 602 self, | |
| 603 candidates, # type: List[InstallationCandidate] | |
| 604 ): | |
| 605 # type: (...) -> List[InstallationCandidate] | |
| 606 """ | |
| 607 Return the applicable candidates from a list of candidates. | |
| 608 """ | |
| 609 # Using None infers from the specifier instead. | |
| 610 allow_prereleases = self._allow_all_prereleases or None | |
| 611 specifier = self._specifier | |
| 612 versions = { | |
| 613 str(v) for v in specifier.filter( | |
| 614 # We turn the version object into a str here because otherwise | |
| 615 # when we're debundled but setuptools isn't, Python will see | |
| 616 # packaging.version.Version and | |
| 617 # pkg_resources._vendor.packaging.version.Version as different | |
| 618 # types. This way we'll use a str as a common data interchange | |
| 619 # format. If we stop using the pkg_resources provided specifier | |
| 620 # and start using our own, we can drop the cast to str(). | |
| 621 (str(c.version) for c in candidates), | |
| 622 prereleases=allow_prereleases, | |
| 623 ) | |
| 624 } | |
| 625 | |
| 626 # Again, converting version to str to deal with debundling. | |
| 627 applicable_candidates = [ | |
| 628 c for c in candidates if str(c.version) in versions | |
| 629 ] | |
| 630 | |
| 631 return filter_unallowed_hashes( | |
| 632 candidates=applicable_candidates, | |
| 633 hashes=self._hashes, | |
| 634 project_name=self._project_name, | |
| 635 ) | |
| 636 | |
| 637 def make_found_candidates( | |
| 638 self, | |
| 639 candidates, # type: List[InstallationCandidate] | |
| 640 ): | |
| 641 # type: (...) -> FoundCandidates | |
| 642 """ | |
| 643 Create and return a `FoundCandidates` instance. | |
| 644 | |
| 645 :param specifier: An optional object implementing `filter` | |
| 646 (e.g. `packaging.specifiers.SpecifierSet`) to filter applicable | |
| 647 versions. | |
| 648 """ | |
| 649 applicable_candidates = self.get_applicable_candidates(candidates) | |
| 650 | |
| 651 return FoundCandidates( | |
| 652 candidates, | |
| 653 applicable_candidates=applicable_candidates, | |
| 654 evaluator=self, | |
| 655 ) | |
| 656 | |
| 657 def _sort_key(self, candidate): | |
| 658 # type: (InstallationCandidate) -> CandidateSortingKey | |
| 659 """ | |
| 660 Function to pass as the `key` argument to a call to sorted() to sort | |
| 661 InstallationCandidates by preference. | |
| 662 | |
| 663 Returns a tuple such that tuples sorting as greater using Python's | |
| 664 default comparison operator are more preferred. | |
| 665 | |
| 666 The preference is as follows: | |
| 667 | |
| 668 First and foremost, candidates with allowed (matching) hashes are | |
| 669 always preferred over candidates without matching hashes. This is | |
| 670 because e.g. if the only candidate with an allowed hash is yanked, | |
| 671 we still want to use that candidate. | |
| 672 | |
| 673 Second, excepting hash considerations, candidates that have been | |
| 674 yanked (in the sense of PEP 592) are always less preferred than | |
| 675 candidates that haven't been yanked. Then: | |
| 676 | |
| 677 If not finding wheels, they are sorted by version only. | |
| 678 If finding wheels, then the sort order is by version, then: | |
| 679 1. existing installs | |
| 680 2. wheels ordered via Wheel.support_index_min(self._supported_tags) | |
| 681 3. source archives | |
| 682 If prefer_binary was set, then all wheels are sorted above sources. | |
| 683 | |
| 684 Note: it was considered to embed this logic into the Link | |
| 685 comparison operators, but then different sdist links | |
| 686 with the same version, would have to be considered equal | |
| 687 """ | |
| 688 valid_tags = self._supported_tags | |
| 689 support_num = len(valid_tags) | |
| 690 build_tag = tuple() # type: BuildTag | |
| 691 binary_preference = 0 | |
| 692 link = candidate.link | |
| 693 if link.is_wheel: | |
| 694 # can raise InvalidWheelFilename | |
| 695 wheel = Wheel(link.filename) | |
| 696 if not wheel.supported(valid_tags): | |
| 697 raise UnsupportedWheel( | |
| 698 "%s is not a supported wheel for this platform. It " | |
| 699 "can't be sorted." % wheel.filename | |
| 700 ) | |
| 701 if self._prefer_binary: | |
| 702 binary_preference = 1 | |
| 703 pri = -(wheel.support_index_min(valid_tags)) | |
| 704 if wheel.build_tag is not None: | |
| 705 match = re.match(r'^(\d+)(.*)$', wheel.build_tag) | |
| 706 build_tag_groups = match.groups() | |
| 707 build_tag = (int(build_tag_groups[0]), build_tag_groups[1]) | |
| 708 else: # sdist | |
| 709 pri = -(support_num) | |
| 710 has_allowed_hash = int(link.is_hash_allowed(self._hashes)) | |
| 711 yank_value = -1 * int(link.is_yanked) # -1 for yanked. | |
| 712 return ( | |
| 713 has_allowed_hash, yank_value, binary_preference, candidate.version, | |
| 714 build_tag, pri, | |
| 715 ) | |
| 716 | |
| 717 def get_best_candidate( | |
| 718 self, | |
| 719 candidates, # type: List[InstallationCandidate] | |
| 720 ): | |
| 721 # type: (...) -> Optional[InstallationCandidate] | |
| 722 """ | |
| 723 Return the best candidate per the instance's sort order, or None if | |
| 724 no candidate is acceptable. | |
| 725 """ | |
| 726 if not candidates: | |
| 727 return None | |
| 728 | |
| 729 best_candidate = max(candidates, key=self._sort_key) | |
| 730 | |
| 731 # Log a warning per PEP 592 if necessary before returning. | |
| 732 link = best_candidate.link | |
| 733 if link.is_yanked: | |
| 734 reason = link.yanked_reason or '<none given>' | |
| 735 msg = ( | |
| 736 # Mark this as a unicode string to prevent | |
| 737 # "UnicodeEncodeError: 'ascii' codec can't encode character" | |
| 738 # in Python 2 when the reason contains non-ascii characters. | |
| 739 u'The candidate selected for download or install is a ' | |
| 740 'yanked version: {candidate}\n' | |
| 741 'Reason for being yanked: {reason}' | |
| 742 ).format(candidate=best_candidate, reason=reason) | |
| 743 logger.warning(msg) | |
| 744 | |
| 745 return best_candidate | |
| 746 | |
| 747 | |
| 748 class FoundCandidates(object): | |
| 749 """A collection of candidates, returned by `PackageFinder.find_candidates`. | |
| 750 | |
| 751 This class is only intended to be instantiated by CandidateEvaluator's | |
| 752 `make_found_candidates()` method. | |
| 753 """ | |
| 754 | |
| 755 def __init__( | |
| 756 self, | |
| 757 candidates, # type: List[InstallationCandidate] | |
| 758 applicable_candidates, # type: List[InstallationCandidate] | |
| 759 evaluator, # type: CandidateEvaluator | |
| 760 ): | |
| 761 # type: (...) -> None | |
| 762 """ | |
| 763 :param candidates: A sequence of all available candidates found. | |
| 764 :param applicable_candidates: The applicable candidates. | |
| 765 :param evaluator: A CandidateEvaluator object to sort applicable | |
| 766 candidates by order of preference. | |
| 767 """ | |
| 768 self._applicable_candidates = applicable_candidates | |
| 769 self._candidates = candidates | |
| 770 self._evaluator = evaluator | |
| 771 | |
| 772 def iter_all(self): | |
| 773 # type: () -> Iterable[InstallationCandidate] | |
| 774 """Iterate through all candidates. | |
| 775 """ | |
| 776 return iter(self._candidates) | |
| 777 | |
| 778 def iter_applicable(self): | |
| 779 # type: () -> Iterable[InstallationCandidate] | |
| 780 """Iterate through the applicable candidates. | |
| 781 """ | |
| 782 return iter(self._applicable_candidates) | |
| 783 | |
| 784 def get_best(self): | |
| 785 # type: () -> Optional[InstallationCandidate] | |
| 786 """Return the best candidate available, or None if no applicable | |
| 787 candidates are found. | |
| 788 """ | |
| 789 candidates = list(self.iter_applicable()) | |
| 790 return self._evaluator.get_best_candidate(candidates) | |
| 791 | |
| 792 | |
| 793 class PackageFinder(object): | |
| 794 """This finds packages. | |
| 795 | |
| 796 This is meant to match easy_install's technique for looking for | |
| 797 packages, by reading pages and looking for appropriate links. | |
| 798 """ | |
| 799 | |
| 800 def __init__( | |
| 801 self, | |
| 802 search_scope, # type: SearchScope | |
| 803 session, # type: PipSession | |
| 804 target_python, # type: TargetPython | |
| 805 allow_yanked, # type: bool | |
| 806 format_control=None, # type: Optional[FormatControl] | |
| 807 trusted_hosts=None, # type: Optional[List[str]] | |
| 808 candidate_prefs=None, # type: CandidatePreferences | |
| 809 ignore_requires_python=None, # type: Optional[bool] | |
| 810 ): | |
| 811 # type: (...) -> None | |
| 812 """ | |
| 813 This constructor is primarily meant to be used by the create() class | |
| 814 method and from tests. | |
| 815 | |
| 816 :param session: The Session to use to make requests. | |
| 817 :param format_control: A FormatControl object, used to control | |
| 818 the selection of source packages / binary packages when consulting | |
| 819 the index and links. | |
| 820 :param candidate_prefs: Options to use when creating a | |
| 821 CandidateEvaluator object. | |
| 822 """ | |
| 823 if trusted_hosts is None: | |
| 824 trusted_hosts = [] | |
| 825 if candidate_prefs is None: | |
| 826 candidate_prefs = CandidatePreferences() | |
| 827 | |
| 828 format_control = format_control or FormatControl(set(), set()) | |
| 829 | |
| 830 self._allow_yanked = allow_yanked | |
| 831 self._candidate_prefs = candidate_prefs | |
| 832 self._ignore_requires_python = ignore_requires_python | |
| 833 self._target_python = target_python | |
| 834 | |
| 835 self.search_scope = search_scope | |
| 836 self.session = session | |
| 837 self.format_control = format_control | |
| 838 self.trusted_hosts = trusted_hosts | |
| 839 | |
| 840 # These are boring links that have already been logged somehow. | |
| 841 self._logged_links = set() # type: Set[Link] | |
| 842 | |
| 843 # Don't include an allow_yanked default value to make sure each call | |
| 844 # site considers whether yanked releases are allowed. This also causes | |
| 845 # that decision to be made explicit in the calling code, which helps | |
| 846 # people when reading the code. | |
| 847 @classmethod | |
| 848 def create( | |
| 849 cls, | |
| 850 search_scope, # type: SearchScope | |
| 851 selection_prefs, # type: SelectionPreferences | |
| 852 trusted_hosts=None, # type: Optional[List[str]] | |
| 853 session=None, # type: Optional[PipSession] | |
| 854 target_python=None, # type: Optional[TargetPython] | |
| 855 ): | |
| 856 # type: (...) -> PackageFinder | |
| 857 """Create a PackageFinder. | |
| 858 | |
| 859 :param selection_prefs: The candidate selection preferences, as a | |
| 860 SelectionPreferences object. | |
| 861 :param trusted_hosts: Domains not to emit warnings for when not using | |
| 862 HTTPS. | |
| 863 :param session: The Session to use to make requests. | |
| 864 :param target_python: The target Python interpreter to use when | |
| 865 checking compatibility. If None (the default), a TargetPython | |
| 866 object will be constructed from the running Python. | |
| 867 """ | |
| 868 if session is None: | |
| 869 raise TypeError( | |
| 870 "PackageFinder.create() missing 1 required keyword argument: " | |
| 871 "'session'" | |
| 872 ) | |
| 873 if target_python is None: | |
| 874 target_python = TargetPython() | |
| 875 | |
| 876 candidate_prefs = CandidatePreferences( | |
| 877 prefer_binary=selection_prefs.prefer_binary, | |
| 878 allow_all_prereleases=selection_prefs.allow_all_prereleases, | |
| 879 ) | |
| 880 | |
| 881 return cls( | |
| 882 candidate_prefs=candidate_prefs, | |
| 883 search_scope=search_scope, | |
| 884 session=session, | |
| 885 target_python=target_python, | |
| 886 allow_yanked=selection_prefs.allow_yanked, | |
| 887 format_control=selection_prefs.format_control, | |
| 888 trusted_hosts=trusted_hosts, | |
| 889 ignore_requires_python=selection_prefs.ignore_requires_python, | |
| 890 ) | |
| 891 | |
| 892 @property | |
| 893 def find_links(self): | |
| 894 # type: () -> List[str] | |
| 895 return self.search_scope.find_links | |
| 896 | |
| 897 @property | |
| 898 def index_urls(self): | |
| 899 # type: () -> List[str] | |
| 900 return self.search_scope.index_urls | |
| 901 | |
| 902 @property | |
| 903 def allow_all_prereleases(self): | |
| 904 # type: () -> bool | |
| 905 return self._candidate_prefs.allow_all_prereleases | |
| 906 | |
| 907 def set_allow_all_prereleases(self): | |
| 908 # type: () -> None | |
| 909 self._candidate_prefs.allow_all_prereleases = True | |
| 910 | |
| 911 def add_trusted_host(self, host, source=None): | |
| 912 # type: (str, Optional[str]) -> None | |
| 913 """ | |
| 914 :param source: An optional source string, for logging where the host | |
| 915 string came from. | |
| 916 """ | |
| 917 # It is okay to add a previously added host because PipSession stores | |
| 918 # the resulting prefixes in a dict. | |
| 919 msg = 'adding trusted host: {!r}'.format(host) | |
| 920 if source is not None: | |
| 921 msg += ' (from {})'.format(source) | |
| 922 logger.info(msg) | |
| 923 self.session.add_insecure_host(host) | |
| 924 if host in self.trusted_hosts: | |
| 925 return | |
| 926 | |
| 927 self.trusted_hosts.append(host) | |
| 928 | |
| 929 def iter_secure_origins(self): | |
| 930 # type: () -> Iterator[SecureOrigin] | |
| 931 for secure_origin in SECURE_ORIGINS: | |
| 932 yield secure_origin | |
| 933 for host in self.trusted_hosts: | |
| 934 yield ('*', host, '*') | |
| 935 | |
| 936 @staticmethod | |
| 937 def _sort_locations(locations, expand_dir=False): | |
| 938 # type: (Sequence[str], bool) -> Tuple[List[str], List[str]] | |
| 939 """ | |
| 940 Sort locations into "files" (archives) and "urls", and return | |
| 941 a pair of lists (files,urls) | |
| 942 """ | |
| 943 files = [] | |
| 944 urls = [] | |
| 945 | |
| 946 # puts the url for the given file path into the appropriate list | |
| 947 def sort_path(path): | |
| 948 url = path_to_url(path) | |
| 949 if mimetypes.guess_type(url, strict=False)[0] == 'text/html': | |
| 950 urls.append(url) | |
| 951 else: | |
| 952 files.append(url) | |
| 953 | |
| 954 for url in locations: | |
| 955 | |
| 956 is_local_path = os.path.exists(url) | |
| 957 is_file_url = url.startswith('file:') | |
| 958 | |
| 959 if is_local_path or is_file_url: | |
| 960 if is_local_path: | |
| 961 path = url | |
| 962 else: | |
| 963 path = url_to_path(url) | |
| 964 if os.path.isdir(path): | |
| 965 if expand_dir: | |
| 966 path = os.path.realpath(path) | |
| 967 for item in os.listdir(path): | |
| 968 sort_path(os.path.join(path, item)) | |
| 969 elif is_file_url: | |
| 970 urls.append(url) | |
| 971 else: | |
| 972 logger.warning( | |
| 973 "Path '{0}' is ignored: " | |
| 974 "it is a directory.".format(path), | |
| 975 ) | |
| 976 elif os.path.isfile(path): | |
| 977 sort_path(path) | |
| 978 else: | |
| 979 logger.warning( | |
| 980 "Url '%s' is ignored: it is neither a file " | |
| 981 "nor a directory.", url, | |
| 982 ) | |
| 983 elif is_url(url): | |
| 984 # Only add url with clear scheme | |
| 985 urls.append(url) | |
| 986 else: | |
| 987 logger.warning( | |
| 988 "Url '%s' is ignored. It is either a non-existing " | |
| 989 "path or lacks a specific scheme.", url, | |
| 990 ) | |
| 991 | |
| 992 return files, urls | |
| 993 | |
| 994 def _validate_secure_origin(self, logger, location): | |
| 995 # type: (Logger, Link) -> bool | |
| 996 # Determine if this url used a secure transport mechanism | |
| 997 parsed = urllib_parse.urlparse(str(location)) | |
| 998 origin = (parsed.scheme, parsed.hostname, parsed.port) | |
| 999 | |
| 1000 # The protocol to use to see if the protocol matches. | |
| 1001 # Don't count the repository type as part of the protocol: in | |
| 1002 # cases such as "git+ssh", only use "ssh". (I.e., Only verify against | |
| 1003 # the last scheme.) | |
| 1004 protocol = origin[0].rsplit('+', 1)[-1] | |
| 1005 | |
| 1006 # Determine if our origin is a secure origin by looking through our | |
| 1007 # hardcoded list of secure origins, as well as any additional ones | |
| 1008 # configured on this PackageFinder instance. | |
| 1009 for secure_origin in self.iter_secure_origins(): | |
| 1010 if protocol != secure_origin[0] and secure_origin[0] != "*": | |
| 1011 continue | |
| 1012 | |
| 1013 try: | |
| 1014 # We need to do this decode dance to ensure that we have a | |
| 1015 # unicode object, even on Python 2.x. | |
| 1016 addr = ipaddress.ip_address( | |
| 1017 origin[1] | |
| 1018 if ( | |
| 1019 isinstance(origin[1], six.text_type) or | |
| 1020 origin[1] is None | |
| 1021 ) | |
| 1022 else origin[1].decode("utf8") | |
| 1023 ) | |
| 1024 network = ipaddress.ip_network( | |
| 1025 secure_origin[1] | |
| 1026 if isinstance(secure_origin[1], six.text_type) | |
| 1027 # setting secure_origin[1] to proper Union[bytes, str] | |
| 1028 # creates problems in other places | |
| 1029 else secure_origin[1].decode("utf8") # type: ignore | |
| 1030 ) | |
| 1031 except ValueError: | |
| 1032 # We don't have both a valid address or a valid network, so | |
| 1033 # we'll check this origin against hostnames. | |
| 1034 if (origin[1] and | |
| 1035 origin[1].lower() != secure_origin[1].lower() and | |
| 1036 secure_origin[1] != "*"): | |
| 1037 continue | |
| 1038 else: | |
| 1039 # We have a valid address and network, so see if the address | |
| 1040 # is contained within the network. | |
| 1041 if addr not in network: | |
| 1042 continue | |
| 1043 | |
| 1044 # Check to see if the port patches | |
| 1045 if (origin[2] != secure_origin[2] and | |
| 1046 secure_origin[2] != "*" and | |
| 1047 secure_origin[2] is not None): | |
| 1048 continue | |
| 1049 | |
| 1050 # If we've gotten here, then this origin matches the current | |
| 1051 # secure origin and we should return True | |
| 1052 return True | |
| 1053 | |
| 1054 # If we've gotten to this point, then the origin isn't secure and we | |
| 1055 # will not accept it as a valid location to search. We will however | |
| 1056 # log a warning that we are ignoring it. | |
| 1057 logger.warning( | |
| 1058 "The repository located at %s is not a trusted or secure host and " | |
| 1059 "is being ignored. If this repository is available via HTTPS we " | |
| 1060 "recommend you use HTTPS instead, otherwise you may silence " | |
| 1061 "this warning and allow it anyway with '--trusted-host %s'.", | |
| 1062 parsed.hostname, | |
| 1063 parsed.hostname, | |
| 1064 ) | |
| 1065 | |
| 1066 return False | |
| 1067 | |
| 1068 def make_link_evaluator(self, project_name): | |
| 1069 # type: (str) -> LinkEvaluator | |
| 1070 canonical_name = canonicalize_name(project_name) | |
| 1071 formats = self.format_control.get_allowed_formats(canonical_name) | |
| 1072 | |
| 1073 return LinkEvaluator( | |
| 1074 project_name=project_name, | |
| 1075 canonical_name=canonical_name, | |
| 1076 formats=formats, | |
| 1077 target_python=self._target_python, | |
| 1078 allow_yanked=self._allow_yanked, | |
| 1079 ignore_requires_python=self._ignore_requires_python, | |
| 1080 ) | |
| 1081 | |
| 1082 def find_all_candidates(self, project_name): | |
| 1083 # type: (str) -> List[InstallationCandidate] | |
| 1084 """Find all available InstallationCandidate for project_name | |
| 1085 | |
| 1086 This checks index_urls and find_links. | |
| 1087 All versions found are returned as an InstallationCandidate list. | |
| 1088 | |
| 1089 See LinkEvaluator.evaluate_link() for details on which files | |
| 1090 are accepted. | |
| 1091 """ | |
| 1092 search_scope = self.search_scope | |
| 1093 index_locations = search_scope.get_index_urls_locations(project_name) | |
| 1094 index_file_loc, index_url_loc = self._sort_locations(index_locations) | |
| 1095 fl_file_loc, fl_url_loc = self._sort_locations( | |
| 1096 self.find_links, expand_dir=True, | |
| 1097 ) | |
| 1098 | |
| 1099 file_locations = (Link(url) for url in itertools.chain( | |
| 1100 index_file_loc, fl_file_loc, | |
| 1101 )) | |
| 1102 | |
| 1103 # We trust every url that the user has given us whether it was given | |
| 1104 # via --index-url or --find-links. | |
| 1105 # We want to filter out any thing which does not have a secure origin. | |
| 1106 url_locations = [ | |
| 1107 link for link in itertools.chain( | |
| 1108 (Link(url) for url in index_url_loc), | |
| 1109 (Link(url) for url in fl_url_loc), | |
| 1110 ) | |
| 1111 if self._validate_secure_origin(logger, link) | |
| 1112 ] | |
| 1113 | |
| 1114 logger.debug('%d location(s) to search for versions of %s:', | |
| 1115 len(url_locations), project_name) | |
| 1116 | |
| 1117 for location in url_locations: | |
| 1118 logger.debug('* %s', location) | |
| 1119 | |
| 1120 link_evaluator = self.make_link_evaluator(project_name) | |
| 1121 find_links_versions = self._package_versions( | |
| 1122 link_evaluator, | |
| 1123 # We trust every directly linked archive in find_links | |
| 1124 (Link(url, '-f') for url in self.find_links), | |
| 1125 ) | |
| 1126 | |
| 1127 page_versions = [] | |
| 1128 for page in self._get_pages(url_locations, project_name): | |
| 1129 logger.debug('Analyzing links from page %s', page.url) | |
| 1130 with indent_log(): | |
| 1131 page_versions.extend( | |
| 1132 self._package_versions(link_evaluator, page.iter_links()) | |
| 1133 ) | |
| 1134 | |
| 1135 file_versions = self._package_versions(link_evaluator, file_locations) | |
| 1136 if file_versions: | |
| 1137 file_versions.sort(reverse=True) | |
| 1138 logger.debug( | |
| 1139 'Local files found: %s', | |
| 1140 ', '.join([ | |
| 1141 url_to_path(candidate.link.url) | |
| 1142 for candidate in file_versions | |
| 1143 ]) | |
| 1144 ) | |
| 1145 | |
| 1146 # This is an intentional priority ordering | |
| 1147 return file_versions + find_links_versions + page_versions | |
| 1148 | |
| 1149 def make_candidate_evaluator( | |
| 1150 self, | |
| 1151 project_name, # type: str | |
| 1152 specifier=None, # type: Optional[specifiers.BaseSpecifier] | |
| 1153 hashes=None, # type: Optional[Hashes] | |
| 1154 ): | |
| 1155 # type: (...) -> CandidateEvaluator | |
| 1156 """Create a CandidateEvaluator object to use. | |
| 1157 """ | |
| 1158 candidate_prefs = self._candidate_prefs | |
| 1159 return CandidateEvaluator.create( | |
| 1160 project_name=project_name, | |
| 1161 target_python=self._target_python, | |
| 1162 prefer_binary=candidate_prefs.prefer_binary, | |
| 1163 allow_all_prereleases=candidate_prefs.allow_all_prereleases, | |
| 1164 specifier=specifier, | |
| 1165 hashes=hashes, | |
| 1166 ) | |
| 1167 | |
| 1168 def find_candidates( | |
| 1169 self, | |
| 1170 project_name, # type: str | |
| 1171 specifier=None, # type: Optional[specifiers.BaseSpecifier] | |
| 1172 hashes=None, # type: Optional[Hashes] | |
| 1173 ): | |
| 1174 # type: (...) -> FoundCandidates | |
| 1175 """Find matches for the given project and specifier. | |
| 1176 | |
| 1177 :param specifier: An optional object implementing `filter` | |
| 1178 (e.g. `packaging.specifiers.SpecifierSet`) to filter applicable | |
| 1179 versions. | |
| 1180 | |
| 1181 :return: A `FoundCandidates` instance. | |
| 1182 """ | |
| 1183 candidates = self.find_all_candidates(project_name) | |
| 1184 candidate_evaluator = self.make_candidate_evaluator( | |
| 1185 project_name=project_name, | |
| 1186 specifier=specifier, | |
| 1187 hashes=hashes, | |
| 1188 ) | |
| 1189 return candidate_evaluator.make_found_candidates(candidates) | |
| 1190 | |
| 1191 def find_requirement(self, req, upgrade): | |
| 1192 # type: (InstallRequirement, bool) -> Optional[Link] | |
| 1193 """Try to find a Link matching req | |
| 1194 | |
| 1195 Expects req, an InstallRequirement and upgrade, a boolean | |
| 1196 Returns a Link if found, | |
| 1197 Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise | |
| 1198 """ | |
| 1199 hashes = req.hashes(trust_internet=False) | |
| 1200 candidates = self.find_candidates( | |
| 1201 req.name, specifier=req.specifier, hashes=hashes, | |
| 1202 ) | |
| 1203 best_candidate = candidates.get_best() | |
| 1204 | |
| 1205 installed_version = None # type: Optional[_BaseVersion] | |
| 1206 if req.satisfied_by is not None: | |
| 1207 installed_version = parse_version(req.satisfied_by.version) | |
| 1208 | |
| 1209 def _format_versions(cand_iter): | |
| 1210 # This repeated parse_version and str() conversion is needed to | |
| 1211 # handle different vendoring sources from pip and pkg_resources. | |
| 1212 # If we stop using the pkg_resources provided specifier and start | |
| 1213 # using our own, we can drop the cast to str(). | |
| 1214 return ", ".join(sorted( | |
| 1215 {str(c.version) for c in cand_iter}, | |
| 1216 key=parse_version, | |
| 1217 )) or "none" | |
| 1218 | |
| 1219 if installed_version is None and best_candidate is None: | |
| 1220 logger.critical( | |
| 1221 'Could not find a version that satisfies the requirement %s ' | |
| 1222 '(from versions: %s)', | |
| 1223 req, | |
| 1224 _format_versions(candidates.iter_all()), | |
| 1225 ) | |
| 1226 | |
| 1227 raise DistributionNotFound( | |
| 1228 'No matching distribution found for %s' % req | |
| 1229 ) | |
| 1230 | |
| 1231 best_installed = False | |
| 1232 if installed_version and ( | |
| 1233 best_candidate is None or | |
| 1234 best_candidate.version <= installed_version): | |
| 1235 best_installed = True | |
| 1236 | |
| 1237 if not upgrade and installed_version is not None: | |
| 1238 if best_installed: | |
| 1239 logger.debug( | |
| 1240 'Existing installed version (%s) is most up-to-date and ' | |
| 1241 'satisfies requirement', | |
| 1242 installed_version, | |
| 1243 ) | |
| 1244 else: | |
| 1245 logger.debug( | |
| 1246 'Existing installed version (%s) satisfies requirement ' | |
| 1247 '(most up-to-date version is %s)', | |
| 1248 installed_version, | |
| 1249 best_candidate.version, | |
| 1250 ) | |
| 1251 return None | |
| 1252 | |
| 1253 if best_installed: | |
| 1254 # We have an existing version, and its the best version | |
| 1255 logger.debug( | |
| 1256 'Installed version (%s) is most up-to-date (past versions: ' | |
| 1257 '%s)', | |
| 1258 installed_version, | |
| 1259 _format_versions(candidates.iter_applicable()), | |
| 1260 ) | |
| 1261 raise BestVersionAlreadyInstalled | |
| 1262 | |
| 1263 logger.debug( | |
| 1264 'Using version %s (newest of versions: %s)', | |
| 1265 best_candidate.version, | |
| 1266 _format_versions(candidates.iter_applicable()), | |
| 1267 ) | |
| 1268 return best_candidate.link | |
| 1269 | |
| 1270 def _get_pages(self, locations, project_name): | |
| 1271 # type: (Iterable[Link], str) -> Iterable[HTMLPage] | |
| 1272 """ | |
| 1273 Yields (page, page_url) from the given locations, skipping | |
| 1274 locations that have errors. | |
| 1275 """ | |
| 1276 seen = set() # type: Set[Link] | |
| 1277 for location in locations: | |
| 1278 if location in seen: | |
| 1279 continue | |
| 1280 seen.add(location) | |
| 1281 | |
| 1282 page = _get_html_page(location, session=self.session) | |
| 1283 if page is None: | |
| 1284 continue | |
| 1285 | |
| 1286 yield page | |
| 1287 | |
| 1288 def _sort_links(self, links): | |
| 1289 # type: (Iterable[Link]) -> List[Link] | |
| 1290 """ | |
| 1291 Returns elements of links in order, non-egg links first, egg links | |
| 1292 second, while eliminating duplicates | |
| 1293 """ | |
| 1294 eggs, no_eggs = [], [] | |
| 1295 seen = set() # type: Set[Link] | |
| 1296 for link in links: | |
| 1297 if link not in seen: | |
| 1298 seen.add(link) | |
| 1299 if link.egg_fragment: | |
| 1300 eggs.append(link) | |
| 1301 else: | |
| 1302 no_eggs.append(link) | |
| 1303 return no_eggs + eggs | |
| 1304 | |
| 1305 def _log_skipped_link(self, link, reason): | |
| 1306 # type: (Link, Text) -> None | |
| 1307 if link not in self._logged_links: | |
| 1308 # Mark this as a unicode string to prevent "UnicodeEncodeError: | |
| 1309 # 'ascii' codec can't encode character" in Python 2 when | |
| 1310 # the reason contains non-ascii characters. | |
| 1311 # Also, put the link at the end so the reason is more visible | |
| 1312 # and because the link string is usually very long. | |
| 1313 logger.debug(u'Skipping link: %s: %s', reason, link) | |
| 1314 self._logged_links.add(link) | |
| 1315 | |
| 1316 def get_install_candidate(self, link_evaluator, link): | |
| 1317 # type: (LinkEvaluator, Link) -> Optional[InstallationCandidate] | |
| 1318 """ | |
| 1319 If the link is a candidate for install, convert it to an | |
| 1320 InstallationCandidate and return it. Otherwise, return None. | |
| 1321 """ | |
| 1322 is_candidate, result = link_evaluator.evaluate_link(link) | |
| 1323 if not is_candidate: | |
| 1324 if result: | |
| 1325 self._log_skipped_link(link, reason=result) | |
| 1326 return None | |
| 1327 | |
| 1328 return InstallationCandidate( | |
| 1329 project=link_evaluator.project_name, | |
| 1330 link=link, | |
| 1331 # Convert the Text result to str since InstallationCandidate | |
| 1332 # accepts str. | |
| 1333 version=str(result), | |
| 1334 ) | |
| 1335 | |
| 1336 def _package_versions(self, link_evaluator, links): | |
| 1337 # type: (LinkEvaluator, Iterable[Link]) -> List[InstallationCandidate] | |
| 1338 result = [] | |
| 1339 for link in self._sort_links(links): | |
| 1340 candidate = self.get_install_candidate(link_evaluator, link) | |
| 1341 if candidate is not None: | |
| 1342 result.append(candidate) | |
| 1343 return result | |
| 1344 | |
| 1345 | |
| 1346 def _find_name_version_sep(fragment, canonical_name): | |
| 1347 # type: (str, str) -> int | |
| 1348 """Find the separator's index based on the package's canonical name. | |
| 1349 | |
| 1350 :param fragment: A <package>+<version> filename "fragment" (stem) or | |
| 1351 egg fragment. | |
| 1352 :param canonical_name: The package's canonical name. | |
| 1353 | |
| 1354 This function is needed since the canonicalized name does not necessarily | |
| 1355 have the same length as the egg info's name part. An example:: | |
| 1356 | |
| 1357 >>> fragment = 'foo__bar-1.0' | |
| 1358 >>> canonical_name = 'foo-bar' | |
| 1359 >>> _find_name_version_sep(fragment, canonical_name) | |
| 1360 8 | |
| 1361 """ | |
| 1362 # Project name and version must be separated by one single dash. Find all | |
| 1363 # occurrences of dashes; if the string in front of it matches the canonical | |
| 1364 # name, this is the one separating the name and version parts. | |
| 1365 for i, c in enumerate(fragment): | |
| 1366 if c != "-": | |
| 1367 continue | |
| 1368 if canonicalize_name(fragment[:i]) == canonical_name: | |
| 1369 return i | |
| 1370 raise ValueError("{} does not match {}".format(fragment, canonical_name)) | |
| 1371 | |
| 1372 | |
| 1373 def _extract_version_from_fragment(fragment, canonical_name): | |
| 1374 # type: (str, str) -> Optional[str] | |
| 1375 """Parse the version string from a <package>+<version> filename | |
| 1376 "fragment" (stem) or egg fragment. | |
| 1377 | |
| 1378 :param fragment: The string to parse. E.g. foo-2.1 | |
| 1379 :param canonical_name: The canonicalized name of the package this | |
| 1380 belongs to. | |
| 1381 """ | |
| 1382 try: | |
| 1383 version_start = _find_name_version_sep(fragment, canonical_name) + 1 | |
| 1384 except ValueError: | |
| 1385 return None | |
| 1386 version = fragment[version_start:] | |
| 1387 if not version: | |
| 1388 return None | |
| 1389 return version | |
| 1390 | |
| 1391 | |
| 1392 def _determine_base_url(document, page_url): | |
| 1393 """Determine the HTML document's base URL. | |
| 1394 | |
| 1395 This looks for a ``<base>`` tag in the HTML document. If present, its href | |
| 1396 attribute denotes the base URL of anchor tags in the document. If there is | |
| 1397 no such tag (or if it does not have a valid href attribute), the HTML | |
| 1398 file's URL is used as the base URL. | |
| 1399 | |
| 1400 :param document: An HTML document representation. The current | |
| 1401 implementation expects the result of ``html5lib.parse()``. | |
| 1402 :param page_url: The URL of the HTML document. | |
| 1403 """ | |
| 1404 for base in document.findall(".//base"): | |
| 1405 href = base.get("href") | |
| 1406 if href is not None: | |
| 1407 return href | |
| 1408 return page_url | |
| 1409 | |
| 1410 | |
| 1411 def _get_encoding_from_headers(headers): | |
| 1412 """Determine if we have any encoding information in our headers. | |
| 1413 """ | |
| 1414 if headers and "Content-Type" in headers: | |
| 1415 content_type, params = cgi.parse_header(headers["Content-Type"]) | |
| 1416 if "charset" in params: | |
| 1417 return params['charset'] | |
| 1418 return None | |
| 1419 | |
| 1420 | |
| 1421 def _clean_link(url): | |
| 1422 # type: (str) -> str | |
| 1423 """Makes sure a link is fully encoded. That is, if a ' ' shows up in | |
| 1424 the link, it will be rewritten to %20 (while not over-quoting | |
| 1425 % or other characters).""" | |
| 1426 # Split the URL into parts according to the general structure | |
| 1427 # `scheme://netloc/path;parameters?query#fragment`. Note that the | |
| 1428 # `netloc` can be empty and the URI will then refer to a local | |
| 1429 # filesystem path. | |
| 1430 result = urllib_parse.urlparse(url) | |
| 1431 # In both cases below we unquote prior to quoting to make sure | |
| 1432 # nothing is double quoted. | |
| 1433 if result.netloc == "": | |
| 1434 # On Windows the path part might contain a drive letter which | |
| 1435 # should not be quoted. On Linux where drive letters do not | |
| 1436 # exist, the colon should be quoted. We rely on urllib.request | |
| 1437 # to do the right thing here. | |
| 1438 path = urllib_request.pathname2url( | |
| 1439 urllib_request.url2pathname(result.path)) | |
| 1440 else: | |
| 1441 # In addition to the `/` character we protect `@` so that | |
| 1442 # revision strings in VCS URLs are properly parsed. | |
| 1443 path = urllib_parse.quote(urllib_parse.unquote(result.path), safe="/@") | |
| 1444 return urllib_parse.urlunparse(result._replace(path=path)) | |
| 1445 | |
| 1446 | |
| 1447 def _create_link_from_element( | |
| 1448 anchor, # type: HTMLElement | |
| 1449 page_url, # type: str | |
| 1450 base_url, # type: str | |
| 1451 ): | |
| 1452 # type: (...) -> Optional[Link] | |
| 1453 """ | |
| 1454 Convert an anchor element in a simple repository page to a Link. | |
| 1455 """ | |
| 1456 href = anchor.get("href") | |
| 1457 if not href: | |
| 1458 return None | |
| 1459 | |
| 1460 url = _clean_link(urllib_parse.urljoin(base_url, href)) | |
| 1461 pyrequire = anchor.get('data-requires-python') | |
| 1462 pyrequire = unescape(pyrequire) if pyrequire else None | |
| 1463 | |
| 1464 yanked_reason = anchor.get('data-yanked') | |
| 1465 if yanked_reason: | |
| 1466 # This is a unicode string in Python 2 (and 3). | |
| 1467 yanked_reason = unescape(yanked_reason) | |
| 1468 | |
| 1469 link = Link( | |
| 1470 url, | |
| 1471 comes_from=page_url, | |
| 1472 requires_python=pyrequire, | |
| 1473 yanked_reason=yanked_reason, | |
| 1474 ) | |
| 1475 | |
| 1476 return link | |
| 1477 | |
| 1478 | |
| 1479 class HTMLPage(object): | |
| 1480 """Represents one page, along with its URL""" | |
| 1481 | |
| 1482 def __init__(self, content, url, headers=None): | |
| 1483 # type: (bytes, str, MutableMapping[str, str]) -> None | |
| 1484 self.content = content | |
| 1485 self.url = url | |
| 1486 self.headers = headers | |
| 1487 | |
| 1488 def __str__(self): | |
| 1489 return redact_password_from_url(self.url) | |
| 1490 | |
| 1491 def iter_links(self): | |
| 1492 # type: () -> Iterable[Link] | |
| 1493 """Yields all links in the page""" | |
| 1494 document = html5lib.parse( | |
| 1495 self.content, | |
| 1496 transport_encoding=_get_encoding_from_headers(self.headers), | |
| 1497 namespaceHTMLElements=False, | |
| 1498 ) | |
| 1499 base_url = _determine_base_url(document, self.url) | |
| 1500 for anchor in document.findall(".//a"): | |
| 1501 link = _create_link_from_element( | |
| 1502 anchor, | |
| 1503 page_url=self.url, | |
| 1504 base_url=base_url, | |
| 1505 ) | |
| 1506 if link is None: | |
| 1507 continue | |
| 1508 yield link |
