Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/pip/_internal/index.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 """Routines related to PyPI, indexes""" | |
2 from __future__ import absolute_import | |
3 | |
4 import cgi | |
5 import itertools | |
6 import logging | |
7 import mimetypes | |
8 import os | |
9 import re | |
10 | |
11 from pip._vendor import html5lib, requests, six | |
12 from pip._vendor.distlib.compat import unescape | |
13 from pip._vendor.packaging import specifiers | |
14 from pip._vendor.packaging.utils import canonicalize_name | |
15 from pip._vendor.packaging.version import parse as parse_version | |
16 from pip._vendor.requests.exceptions import HTTPError, RetryError, SSLError | |
17 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
18 from pip._vendor.six.moves.urllib import request as urllib_request | |
19 | |
20 from pip._internal.download import is_url, url_to_path | |
21 from pip._internal.exceptions import ( | |
22 BestVersionAlreadyInstalled, DistributionNotFound, InvalidWheelFilename, | |
23 UnsupportedWheel, | |
24 ) | |
25 from pip._internal.models.candidate import InstallationCandidate | |
26 from pip._internal.models.format_control import FormatControl | |
27 from pip._internal.models.link import Link | |
28 from pip._internal.models.selection_prefs import SelectionPreferences | |
29 from pip._internal.models.target_python import TargetPython | |
30 from pip._internal.utils.compat import ipaddress | |
31 from pip._internal.utils.logging import indent_log | |
32 from pip._internal.utils.misc import ( | |
33 ARCHIVE_EXTENSIONS, SUPPORTED_EXTENSIONS, WHEEL_EXTENSION, path_to_url, | |
34 redact_password_from_url, | |
35 ) | |
36 from pip._internal.utils.packaging import check_requires_python | |
37 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
38 from pip._internal.wheel import Wheel | |
39 | |
40 if MYPY_CHECK_RUNNING: | |
41 from logging import Logger | |
42 from typing import ( | |
43 Any, Callable, FrozenSet, Iterable, Iterator, List, MutableMapping, | |
44 Optional, Sequence, Set, Text, Tuple, Union, | |
45 ) | |
46 import xml.etree.ElementTree | |
47 from pip._vendor.packaging.version import _BaseVersion | |
48 from pip._vendor.requests import Response | |
49 from pip._internal.models.search_scope import SearchScope | |
50 from pip._internal.req import InstallRequirement | |
51 from pip._internal.download import PipSession | |
52 from pip._internal.pep425tags import Pep425Tag | |
53 from pip._internal.utils.hashes import Hashes | |
54 | |
55 BuildTag = Tuple[Any, ...] # either empty tuple or Tuple[int, str] | |
56 CandidateSortingKey = ( | |
57 Tuple[int, int, int, _BaseVersion, BuildTag, Optional[int]] | |
58 ) | |
59 HTMLElement = xml.etree.ElementTree.Element | |
60 SecureOrigin = Tuple[str, str, Optional[str]] | |
61 | |
62 | |
63 __all__ = ['FormatControl', 'FoundCandidates', 'PackageFinder'] | |
64 | |
65 | |
66 SECURE_ORIGINS = [ | |
67 # protocol, hostname, port | |
68 # Taken from Chrome's list of secure origins (See: http://bit.ly/1qrySKC) | |
69 ("https", "*", "*"), | |
70 ("*", "localhost", "*"), | |
71 ("*", "127.0.0.0/8", "*"), | |
72 ("*", "::1/128", "*"), | |
73 ("file", "*", None), | |
74 # ssh is always secure. | |
75 ("ssh", "*", "*"), | |
76 ] # type: List[SecureOrigin] | |
77 | |
78 | |
79 logger = logging.getLogger(__name__) | |
80 | |
81 | |
82 def _match_vcs_scheme(url): | |
83 # type: (str) -> Optional[str] | |
84 """Look for VCS schemes in the URL. | |
85 | |
86 Returns the matched VCS scheme, or None if there's no match. | |
87 """ | |
88 from pip._internal.vcs import vcs | |
89 for scheme in vcs.schemes: | |
90 if url.lower().startswith(scheme) and url[len(scheme)] in '+:': | |
91 return scheme | |
92 return None | |
93 | |
94 | |
95 def _is_url_like_archive(url): | |
96 # type: (str) -> bool | |
97 """Return whether the URL looks like an archive. | |
98 """ | |
99 filename = Link(url).filename | |
100 for bad_ext in ARCHIVE_EXTENSIONS: | |
101 if filename.endswith(bad_ext): | |
102 return True | |
103 return False | |
104 | |
105 | |
106 class _NotHTML(Exception): | |
107 def __init__(self, content_type, request_desc): | |
108 # type: (str, str) -> None | |
109 super(_NotHTML, self).__init__(content_type, request_desc) | |
110 self.content_type = content_type | |
111 self.request_desc = request_desc | |
112 | |
113 | |
114 def _ensure_html_header(response): | |
115 # type: (Response) -> None | |
116 """Check the Content-Type header to ensure the response contains HTML. | |
117 | |
118 Raises `_NotHTML` if the content type is not text/html. | |
119 """ | |
120 content_type = response.headers.get("Content-Type", "") | |
121 if not content_type.lower().startswith("text/html"): | |
122 raise _NotHTML(content_type, response.request.method) | |
123 | |
124 | |
125 class _NotHTTP(Exception): | |
126 pass | |
127 | |
128 | |
129 def _ensure_html_response(url, session): | |
130 # type: (str, PipSession) -> None | |
131 """Send a HEAD request to the URL, and ensure the response contains HTML. | |
132 | |
133 Raises `_NotHTTP` if the URL is not available for a HEAD request, or | |
134 `_NotHTML` if the content type is not text/html. | |
135 """ | |
136 scheme, netloc, path, query, fragment = urllib_parse.urlsplit(url) | |
137 if scheme not in {'http', 'https'}: | |
138 raise _NotHTTP() | |
139 | |
140 resp = session.head(url, allow_redirects=True) | |
141 resp.raise_for_status() | |
142 | |
143 _ensure_html_header(resp) | |
144 | |
145 | |
146 def _get_html_response(url, session): | |
147 # type: (str, PipSession) -> Response | |
148 """Access an HTML page with GET, and return the response. | |
149 | |
150 This consists of three parts: | |
151 | |
152 1. If the URL looks suspiciously like an archive, send a HEAD first to | |
153 check the Content-Type is HTML, to avoid downloading a large file. | |
154 Raise `_NotHTTP` if the content type cannot be determined, or | |
155 `_NotHTML` if it is not HTML. | |
156 2. Actually perform the request. Raise HTTP exceptions on network failures. | |
157 3. Check the Content-Type header to make sure we got HTML, and raise | |
158 `_NotHTML` otherwise. | |
159 """ | |
160 if _is_url_like_archive(url): | |
161 _ensure_html_response(url, session=session) | |
162 | |
163 logger.debug('Getting page %s', redact_password_from_url(url)) | |
164 | |
165 resp = session.get( | |
166 url, | |
167 headers={ | |
168 "Accept": "text/html", | |
169 # We don't want to blindly returned cached data for | |
170 # /simple/, because authors generally expecting that | |
171 # twine upload && pip install will function, but if | |
172 # they've done a pip install in the last ~10 minutes | |
173 # it won't. Thus by setting this to zero we will not | |
174 # blindly use any cached data, however the benefit of | |
175 # using max-age=0 instead of no-cache, is that we will | |
176 # still support conditional requests, so we will still | |
177 # minimize traffic sent in cases where the page hasn't | |
178 # changed at all, we will just always incur the round | |
179 # trip for the conditional GET now instead of only | |
180 # once per 10 minutes. | |
181 # For more information, please see pypa/pip#5670. | |
182 "Cache-Control": "max-age=0", | |
183 }, | |
184 ) | |
185 resp.raise_for_status() | |
186 | |
187 # The check for archives above only works if the url ends with | |
188 # something that looks like an archive. However that is not a | |
189 # requirement of an url. Unless we issue a HEAD request on every | |
190 # url we cannot know ahead of time for sure if something is HTML | |
191 # or not. However we can check after we've downloaded it. | |
192 _ensure_html_header(resp) | |
193 | |
194 return resp | |
195 | |
196 | |
197 def _handle_get_page_fail( | |
198 link, # type: Link | |
199 reason, # type: Union[str, Exception] | |
200 meth=None # type: Optional[Callable[..., None]] | |
201 ): | |
202 # type: (...) -> None | |
203 if meth is None: | |
204 meth = logger.debug | |
205 meth("Could not fetch URL %s: %s - skipping", link, reason) | |
206 | |
207 | |
208 def _get_html_page(link, session=None): | |
209 # type: (Link, Optional[PipSession]) -> Optional[HTMLPage] | |
210 if session is None: | |
211 raise TypeError( | |
212 "_get_html_page() missing 1 required keyword argument: 'session'" | |
213 ) | |
214 | |
215 url = link.url.split('#', 1)[0] | |
216 | |
217 # Check for VCS schemes that do not support lookup as web pages. | |
218 vcs_scheme = _match_vcs_scheme(url) | |
219 if vcs_scheme: | |
220 logger.debug('Cannot look at %s URL %s', vcs_scheme, link) | |
221 return None | |
222 | |
223 # Tack index.html onto file:// URLs that point to directories | |
224 scheme, _, path, _, _, _ = urllib_parse.urlparse(url) | |
225 if (scheme == 'file' and os.path.isdir(urllib_request.url2pathname(path))): | |
226 # add trailing slash if not present so urljoin doesn't trim | |
227 # final segment | |
228 if not url.endswith('/'): | |
229 url += '/' | |
230 url = urllib_parse.urljoin(url, 'index.html') | |
231 logger.debug(' file: URL is directory, getting %s', url) | |
232 | |
233 try: | |
234 resp = _get_html_response(url, session=session) | |
235 except _NotHTTP: | |
236 logger.debug( | |
237 'Skipping page %s because it looks like an archive, and cannot ' | |
238 'be checked by HEAD.', link, | |
239 ) | |
240 except _NotHTML as exc: | |
241 logger.debug( | |
242 'Skipping page %s because the %s request got Content-Type: %s', | |
243 link, exc.request_desc, exc.content_type, | |
244 ) | |
245 except HTTPError as exc: | |
246 _handle_get_page_fail(link, exc) | |
247 except RetryError as exc: | |
248 _handle_get_page_fail(link, exc) | |
249 except SSLError as exc: | |
250 reason = "There was a problem confirming the ssl certificate: " | |
251 reason += str(exc) | |
252 _handle_get_page_fail(link, reason, meth=logger.info) | |
253 except requests.ConnectionError as exc: | |
254 _handle_get_page_fail(link, "connection error: %s" % exc) | |
255 except requests.Timeout: | |
256 _handle_get_page_fail(link, "timed out") | |
257 else: | |
258 return HTMLPage(resp.content, resp.url, resp.headers) | |
259 return None | |
260 | |
261 | |
262 def _check_link_requires_python( | |
263 link, # type: Link | |
264 version_info, # type: Tuple[int, int, int] | |
265 ignore_requires_python=False, # type: bool | |
266 ): | |
267 # type: (...) -> bool | |
268 """ | |
269 Return whether the given Python version is compatible with a link's | |
270 "Requires-Python" value. | |
271 | |
272 :param version_info: A 3-tuple of ints representing the Python | |
273 major-minor-micro version to check. | |
274 :param ignore_requires_python: Whether to ignore the "Requires-Python" | |
275 value if the given Python version isn't compatible. | |
276 """ | |
277 try: | |
278 is_compatible = check_requires_python( | |
279 link.requires_python, version_info=version_info, | |
280 ) | |
281 except specifiers.InvalidSpecifier: | |
282 logger.debug( | |
283 "Ignoring invalid Requires-Python (%r) for link: %s", | |
284 link.requires_python, link, | |
285 ) | |
286 else: | |
287 if not is_compatible: | |
288 version = '.'.join(map(str, version_info)) | |
289 if not ignore_requires_python: | |
290 logger.debug( | |
291 'Link requires a different Python (%s not in: %r): %s', | |
292 version, link.requires_python, link, | |
293 ) | |
294 return False | |
295 | |
296 logger.debug( | |
297 'Ignoring failed Requires-Python check (%s not in: %r) ' | |
298 'for link: %s', | |
299 version, link.requires_python, link, | |
300 ) | |
301 | |
302 return True | |
303 | |
304 | |
305 class LinkEvaluator(object): | |
306 | |
307 """ | |
308 Responsible for evaluating links for a particular project. | |
309 """ | |
310 | |
311 _py_version_re = re.compile(r'-py([123]\.?[0-9]?)$') | |
312 | |
313 # Don't include an allow_yanked default value to make sure each call | |
314 # site considers whether yanked releases are allowed. This also causes | |
315 # that decision to be made explicit in the calling code, which helps | |
316 # people when reading the code. | |
317 def __init__( | |
318 self, | |
319 project_name, # type: str | |
320 canonical_name, # type: str | |
321 formats, # type: FrozenSet | |
322 target_python, # type: TargetPython | |
323 allow_yanked, # type: bool | |
324 ignore_requires_python=None, # type: Optional[bool] | |
325 ): | |
326 # type: (...) -> None | |
327 """ | |
328 :param project_name: The user supplied package name. | |
329 :param canonical_name: The canonical package name. | |
330 :param formats: The formats allowed for this package. Should be a set | |
331 with 'binary' or 'source' or both in it. | |
332 :param target_python: The target Python interpreter to use when | |
333 evaluating link compatibility. This is used, for example, to | |
334 check wheel compatibility, as well as when checking the Python | |
335 version, e.g. the Python version embedded in a link filename | |
336 (or egg fragment) and against an HTML link's optional PEP 503 | |
337 "data-requires-python" attribute. | |
338 :param allow_yanked: Whether files marked as yanked (in the sense | |
339 of PEP 592) are permitted to be candidates for install. | |
340 :param ignore_requires_python: Whether to ignore incompatible | |
341 PEP 503 "data-requires-python" values in HTML links. Defaults | |
342 to False. | |
343 """ | |
344 if ignore_requires_python is None: | |
345 ignore_requires_python = False | |
346 | |
347 self._allow_yanked = allow_yanked | |
348 self._canonical_name = canonical_name | |
349 self._ignore_requires_python = ignore_requires_python | |
350 self._formats = formats | |
351 self._target_python = target_python | |
352 | |
353 self.project_name = project_name | |
354 | |
355 def evaluate_link(self, link): | |
356 # type: (Link) -> Tuple[bool, Optional[Text]] | |
357 """ | |
358 Determine whether a link is a candidate for installation. | |
359 | |
360 :return: A tuple (is_candidate, result), where `result` is (1) a | |
361 version string if `is_candidate` is True, and (2) if | |
362 `is_candidate` is False, an optional string to log the reason | |
363 the link fails to qualify. | |
364 """ | |
365 version = None | |
366 if link.is_yanked and not self._allow_yanked: | |
367 reason = link.yanked_reason or '<none given>' | |
368 # Mark this as a unicode string to prevent "UnicodeEncodeError: | |
369 # 'ascii' codec can't encode character" in Python 2 when | |
370 # the reason contains non-ascii characters. | |
371 return (False, u'yanked for reason: {}'.format(reason)) | |
372 | |
373 if link.egg_fragment: | |
374 egg_info = link.egg_fragment | |
375 ext = link.ext | |
376 else: | |
377 egg_info, ext = link.splitext() | |
378 if not ext: | |
379 return (False, 'not a file') | |
380 if ext not in SUPPORTED_EXTENSIONS: | |
381 return (False, 'unsupported archive format: %s' % ext) | |
382 if "binary" not in self._formats and ext == WHEEL_EXTENSION: | |
383 reason = 'No binaries permitted for %s' % self.project_name | |
384 return (False, reason) | |
385 if "macosx10" in link.path and ext == '.zip': | |
386 return (False, 'macosx10 one') | |
387 if ext == WHEEL_EXTENSION: | |
388 try: | |
389 wheel = Wheel(link.filename) | |
390 except InvalidWheelFilename: | |
391 return (False, 'invalid wheel filename') | |
392 if canonicalize_name(wheel.name) != self._canonical_name: | |
393 reason = 'wrong project name (not %s)' % self.project_name | |
394 return (False, reason) | |
395 | |
396 supported_tags = self._target_python.get_tags() | |
397 if not wheel.supported(supported_tags): | |
398 # Include the wheel's tags in the reason string to | |
399 # simplify troubleshooting compatibility issues. | |
400 file_tags = wheel.get_formatted_file_tags() | |
401 reason = ( | |
402 "none of the wheel's tags match: {}".format( | |
403 ', '.join(file_tags) | |
404 ) | |
405 ) | |
406 return (False, reason) | |
407 | |
408 version = wheel.version | |
409 | |
410 # This should be up by the self.ok_binary check, but see issue 2700. | |
411 if "source" not in self._formats and ext != WHEEL_EXTENSION: | |
412 return (False, 'No sources permitted for %s' % self.project_name) | |
413 | |
414 if not version: | |
415 version = _extract_version_from_fragment( | |
416 egg_info, self._canonical_name, | |
417 ) | |
418 if not version: | |
419 return ( | |
420 False, 'Missing project version for %s' % self.project_name, | |
421 ) | |
422 | |
423 match = self._py_version_re.search(version) | |
424 if match: | |
425 version = version[:match.start()] | |
426 py_version = match.group(1) | |
427 if py_version != self._target_python.py_version: | |
428 return (False, 'Python version is incorrect') | |
429 | |
430 supports_python = _check_link_requires_python( | |
431 link, version_info=self._target_python.py_version_info, | |
432 ignore_requires_python=self._ignore_requires_python, | |
433 ) | |
434 if not supports_python: | |
435 # Return None for the reason text to suppress calling | |
436 # _log_skipped_link(). | |
437 return (False, None) | |
438 | |
439 logger.debug('Found link %s, version: %s', link, version) | |
440 | |
441 return (True, version) | |
442 | |
443 | |
444 def filter_unallowed_hashes( | |
445 candidates, # type: List[InstallationCandidate] | |
446 hashes, # type: Hashes | |
447 project_name, # type: str | |
448 ): | |
449 # type: (...) -> List[InstallationCandidate] | |
450 """ | |
451 Filter out candidates whose hashes aren't allowed, and return a new | |
452 list of candidates. | |
453 | |
454 If at least one candidate has an allowed hash, then all candidates with | |
455 either an allowed hash or no hash specified are returned. Otherwise, | |
456 the given candidates are returned. | |
457 | |
458 Including the candidates with no hash specified when there is a match | |
459 allows a warning to be logged if there is a more preferred candidate | |
460 with no hash specified. Returning all candidates in the case of no | |
461 matches lets pip report the hash of the candidate that would otherwise | |
462 have been installed (e.g. permitting the user to more easily update | |
463 their requirements file with the desired hash). | |
464 """ | |
465 if not hashes: | |
466 logger.debug( | |
467 'Given no hashes to check %s links for project %r: ' | |
468 'discarding no candidates', | |
469 len(candidates), | |
470 project_name, | |
471 ) | |
472 # Make sure we're not returning back the given value. | |
473 return list(candidates) | |
474 | |
475 matches_or_no_digest = [] | |
476 # Collect the non-matches for logging purposes. | |
477 non_matches = [] | |
478 match_count = 0 | |
479 for candidate in candidates: | |
480 link = candidate.link | |
481 if not link.has_hash: | |
482 pass | |
483 elif link.is_hash_allowed(hashes=hashes): | |
484 match_count += 1 | |
485 else: | |
486 non_matches.append(candidate) | |
487 continue | |
488 | |
489 matches_or_no_digest.append(candidate) | |
490 | |
491 if match_count: | |
492 filtered = matches_or_no_digest | |
493 else: | |
494 # Make sure we're not returning back the given value. | |
495 filtered = list(candidates) | |
496 | |
497 if len(filtered) == len(candidates): | |
498 discard_message = 'discarding no candidates' | |
499 else: | |
500 discard_message = 'discarding {} non-matches:\n {}'.format( | |
501 len(non_matches), | |
502 '\n '.join(str(candidate.link) for candidate in non_matches) | |
503 ) | |
504 | |
505 logger.debug( | |
506 'Checked %s links for project %r against %s hashes ' | |
507 '(%s matches, %s no digest): %s', | |
508 len(candidates), | |
509 project_name, | |
510 hashes.digest_count, | |
511 match_count, | |
512 len(matches_or_no_digest) - match_count, | |
513 discard_message | |
514 ) | |
515 | |
516 return filtered | |
517 | |
518 | |
519 class CandidatePreferences(object): | |
520 | |
521 """ | |
522 Encapsulates some of the preferences for filtering and sorting | |
523 InstallationCandidate objects. | |
524 """ | |
525 | |
526 def __init__( | |
527 self, | |
528 prefer_binary=False, # type: bool | |
529 allow_all_prereleases=False, # type: bool | |
530 ): | |
531 # type: (...) -> None | |
532 """ | |
533 :param allow_all_prereleases: Whether to allow all pre-releases. | |
534 """ | |
535 self.allow_all_prereleases = allow_all_prereleases | |
536 self.prefer_binary = prefer_binary | |
537 | |
538 | |
539 class CandidateEvaluator(object): | |
540 | |
541 """ | |
542 Responsible for filtering and sorting candidates for installation based | |
543 on what tags are valid. | |
544 """ | |
545 | |
546 @classmethod | |
547 def create( | |
548 cls, | |
549 project_name, # type: str | |
550 target_python=None, # type: Optional[TargetPython] | |
551 prefer_binary=False, # type: bool | |
552 allow_all_prereleases=False, # type: bool | |
553 specifier=None, # type: Optional[specifiers.BaseSpecifier] | |
554 hashes=None, # type: Optional[Hashes] | |
555 ): | |
556 # type: (...) -> CandidateEvaluator | |
557 """Create a CandidateEvaluator object. | |
558 | |
559 :param target_python: The target Python interpreter to use when | |
560 checking compatibility. If None (the default), a TargetPython | |
561 object will be constructed from the running Python. | |
562 :param hashes: An optional collection of allowed hashes. | |
563 """ | |
564 if target_python is None: | |
565 target_python = TargetPython() | |
566 if specifier is None: | |
567 specifier = specifiers.SpecifierSet() | |
568 | |
569 supported_tags = target_python.get_tags() | |
570 | |
571 return cls( | |
572 project_name=project_name, | |
573 supported_tags=supported_tags, | |
574 specifier=specifier, | |
575 prefer_binary=prefer_binary, | |
576 allow_all_prereleases=allow_all_prereleases, | |
577 hashes=hashes, | |
578 ) | |
579 | |
580 def __init__( | |
581 self, | |
582 project_name, # type: str | |
583 supported_tags, # type: List[Pep425Tag] | |
584 specifier, # type: specifiers.BaseSpecifier | |
585 prefer_binary=False, # type: bool | |
586 allow_all_prereleases=False, # type: bool | |
587 hashes=None, # type: Optional[Hashes] | |
588 ): | |
589 # type: (...) -> None | |
590 """ | |
591 :param supported_tags: The PEP 425 tags supported by the target | |
592 Python in order of preference (most preferred first). | |
593 """ | |
594 self._allow_all_prereleases = allow_all_prereleases | |
595 self._hashes = hashes | |
596 self._prefer_binary = prefer_binary | |
597 self._project_name = project_name | |
598 self._specifier = specifier | |
599 self._supported_tags = supported_tags | |
600 | |
601 def get_applicable_candidates( | |
602 self, | |
603 candidates, # type: List[InstallationCandidate] | |
604 ): | |
605 # type: (...) -> List[InstallationCandidate] | |
606 """ | |
607 Return the applicable candidates from a list of candidates. | |
608 """ | |
609 # Using None infers from the specifier instead. | |
610 allow_prereleases = self._allow_all_prereleases or None | |
611 specifier = self._specifier | |
612 versions = { | |
613 str(v) for v in specifier.filter( | |
614 # We turn the version object into a str here because otherwise | |
615 # when we're debundled but setuptools isn't, Python will see | |
616 # packaging.version.Version and | |
617 # pkg_resources._vendor.packaging.version.Version as different | |
618 # types. This way we'll use a str as a common data interchange | |
619 # format. If we stop using the pkg_resources provided specifier | |
620 # and start using our own, we can drop the cast to str(). | |
621 (str(c.version) for c in candidates), | |
622 prereleases=allow_prereleases, | |
623 ) | |
624 } | |
625 | |
626 # Again, converting version to str to deal with debundling. | |
627 applicable_candidates = [ | |
628 c for c in candidates if str(c.version) in versions | |
629 ] | |
630 | |
631 return filter_unallowed_hashes( | |
632 candidates=applicable_candidates, | |
633 hashes=self._hashes, | |
634 project_name=self._project_name, | |
635 ) | |
636 | |
637 def make_found_candidates( | |
638 self, | |
639 candidates, # type: List[InstallationCandidate] | |
640 ): | |
641 # type: (...) -> FoundCandidates | |
642 """ | |
643 Create and return a `FoundCandidates` instance. | |
644 | |
645 :param specifier: An optional object implementing `filter` | |
646 (e.g. `packaging.specifiers.SpecifierSet`) to filter applicable | |
647 versions. | |
648 """ | |
649 applicable_candidates = self.get_applicable_candidates(candidates) | |
650 | |
651 return FoundCandidates( | |
652 candidates, | |
653 applicable_candidates=applicable_candidates, | |
654 evaluator=self, | |
655 ) | |
656 | |
657 def _sort_key(self, candidate): | |
658 # type: (InstallationCandidate) -> CandidateSortingKey | |
659 """ | |
660 Function to pass as the `key` argument to a call to sorted() to sort | |
661 InstallationCandidates by preference. | |
662 | |
663 Returns a tuple such that tuples sorting as greater using Python's | |
664 default comparison operator are more preferred. | |
665 | |
666 The preference is as follows: | |
667 | |
668 First and foremost, candidates with allowed (matching) hashes are | |
669 always preferred over candidates without matching hashes. This is | |
670 because e.g. if the only candidate with an allowed hash is yanked, | |
671 we still want to use that candidate. | |
672 | |
673 Second, excepting hash considerations, candidates that have been | |
674 yanked (in the sense of PEP 592) are always less preferred than | |
675 candidates that haven't been yanked. Then: | |
676 | |
677 If not finding wheels, they are sorted by version only. | |
678 If finding wheels, then the sort order is by version, then: | |
679 1. existing installs | |
680 2. wheels ordered via Wheel.support_index_min(self._supported_tags) | |
681 3. source archives | |
682 If prefer_binary was set, then all wheels are sorted above sources. | |
683 | |
684 Note: it was considered to embed this logic into the Link | |
685 comparison operators, but then different sdist links | |
686 with the same version, would have to be considered equal | |
687 """ | |
688 valid_tags = self._supported_tags | |
689 support_num = len(valid_tags) | |
690 build_tag = tuple() # type: BuildTag | |
691 binary_preference = 0 | |
692 link = candidate.link | |
693 if link.is_wheel: | |
694 # can raise InvalidWheelFilename | |
695 wheel = Wheel(link.filename) | |
696 if not wheel.supported(valid_tags): | |
697 raise UnsupportedWheel( | |
698 "%s is not a supported wheel for this platform. It " | |
699 "can't be sorted." % wheel.filename | |
700 ) | |
701 if self._prefer_binary: | |
702 binary_preference = 1 | |
703 pri = -(wheel.support_index_min(valid_tags)) | |
704 if wheel.build_tag is not None: | |
705 match = re.match(r'^(\d+)(.*)$', wheel.build_tag) | |
706 build_tag_groups = match.groups() | |
707 build_tag = (int(build_tag_groups[0]), build_tag_groups[1]) | |
708 else: # sdist | |
709 pri = -(support_num) | |
710 has_allowed_hash = int(link.is_hash_allowed(self._hashes)) | |
711 yank_value = -1 * int(link.is_yanked) # -1 for yanked. | |
712 return ( | |
713 has_allowed_hash, yank_value, binary_preference, candidate.version, | |
714 build_tag, pri, | |
715 ) | |
716 | |
717 def get_best_candidate( | |
718 self, | |
719 candidates, # type: List[InstallationCandidate] | |
720 ): | |
721 # type: (...) -> Optional[InstallationCandidate] | |
722 """ | |
723 Return the best candidate per the instance's sort order, or None if | |
724 no candidate is acceptable. | |
725 """ | |
726 if not candidates: | |
727 return None | |
728 | |
729 best_candidate = max(candidates, key=self._sort_key) | |
730 | |
731 # Log a warning per PEP 592 if necessary before returning. | |
732 link = best_candidate.link | |
733 if link.is_yanked: | |
734 reason = link.yanked_reason or '<none given>' | |
735 msg = ( | |
736 # Mark this as a unicode string to prevent | |
737 # "UnicodeEncodeError: 'ascii' codec can't encode character" | |
738 # in Python 2 when the reason contains non-ascii characters. | |
739 u'The candidate selected for download or install is a ' | |
740 'yanked version: {candidate}\n' | |
741 'Reason for being yanked: {reason}' | |
742 ).format(candidate=best_candidate, reason=reason) | |
743 logger.warning(msg) | |
744 | |
745 return best_candidate | |
746 | |
747 | |
748 class FoundCandidates(object): | |
749 """A collection of candidates, returned by `PackageFinder.find_candidates`. | |
750 | |
751 This class is only intended to be instantiated by CandidateEvaluator's | |
752 `make_found_candidates()` method. | |
753 """ | |
754 | |
755 def __init__( | |
756 self, | |
757 candidates, # type: List[InstallationCandidate] | |
758 applicable_candidates, # type: List[InstallationCandidate] | |
759 evaluator, # type: CandidateEvaluator | |
760 ): | |
761 # type: (...) -> None | |
762 """ | |
763 :param candidates: A sequence of all available candidates found. | |
764 :param applicable_candidates: The applicable candidates. | |
765 :param evaluator: A CandidateEvaluator object to sort applicable | |
766 candidates by order of preference. | |
767 """ | |
768 self._applicable_candidates = applicable_candidates | |
769 self._candidates = candidates | |
770 self._evaluator = evaluator | |
771 | |
772 def iter_all(self): | |
773 # type: () -> Iterable[InstallationCandidate] | |
774 """Iterate through all candidates. | |
775 """ | |
776 return iter(self._candidates) | |
777 | |
778 def iter_applicable(self): | |
779 # type: () -> Iterable[InstallationCandidate] | |
780 """Iterate through the applicable candidates. | |
781 """ | |
782 return iter(self._applicable_candidates) | |
783 | |
784 def get_best(self): | |
785 # type: () -> Optional[InstallationCandidate] | |
786 """Return the best candidate available, or None if no applicable | |
787 candidates are found. | |
788 """ | |
789 candidates = list(self.iter_applicable()) | |
790 return self._evaluator.get_best_candidate(candidates) | |
791 | |
792 | |
793 class PackageFinder(object): | |
794 """This finds packages. | |
795 | |
796 This is meant to match easy_install's technique for looking for | |
797 packages, by reading pages and looking for appropriate links. | |
798 """ | |
799 | |
800 def __init__( | |
801 self, | |
802 search_scope, # type: SearchScope | |
803 session, # type: PipSession | |
804 target_python, # type: TargetPython | |
805 allow_yanked, # type: bool | |
806 format_control=None, # type: Optional[FormatControl] | |
807 trusted_hosts=None, # type: Optional[List[str]] | |
808 candidate_prefs=None, # type: CandidatePreferences | |
809 ignore_requires_python=None, # type: Optional[bool] | |
810 ): | |
811 # type: (...) -> None | |
812 """ | |
813 This constructor is primarily meant to be used by the create() class | |
814 method and from tests. | |
815 | |
816 :param session: The Session to use to make requests. | |
817 :param format_control: A FormatControl object, used to control | |
818 the selection of source packages / binary packages when consulting | |
819 the index and links. | |
820 :param candidate_prefs: Options to use when creating a | |
821 CandidateEvaluator object. | |
822 """ | |
823 if trusted_hosts is None: | |
824 trusted_hosts = [] | |
825 if candidate_prefs is None: | |
826 candidate_prefs = CandidatePreferences() | |
827 | |
828 format_control = format_control or FormatControl(set(), set()) | |
829 | |
830 self._allow_yanked = allow_yanked | |
831 self._candidate_prefs = candidate_prefs | |
832 self._ignore_requires_python = ignore_requires_python | |
833 self._target_python = target_python | |
834 | |
835 self.search_scope = search_scope | |
836 self.session = session | |
837 self.format_control = format_control | |
838 self.trusted_hosts = trusted_hosts | |
839 | |
840 # These are boring links that have already been logged somehow. | |
841 self._logged_links = set() # type: Set[Link] | |
842 | |
843 # Don't include an allow_yanked default value to make sure each call | |
844 # site considers whether yanked releases are allowed. This also causes | |
845 # that decision to be made explicit in the calling code, which helps | |
846 # people when reading the code. | |
847 @classmethod | |
848 def create( | |
849 cls, | |
850 search_scope, # type: SearchScope | |
851 selection_prefs, # type: SelectionPreferences | |
852 trusted_hosts=None, # type: Optional[List[str]] | |
853 session=None, # type: Optional[PipSession] | |
854 target_python=None, # type: Optional[TargetPython] | |
855 ): | |
856 # type: (...) -> PackageFinder | |
857 """Create a PackageFinder. | |
858 | |
859 :param selection_prefs: The candidate selection preferences, as a | |
860 SelectionPreferences object. | |
861 :param trusted_hosts: Domains not to emit warnings for when not using | |
862 HTTPS. | |
863 :param session: The Session to use to make requests. | |
864 :param target_python: The target Python interpreter to use when | |
865 checking compatibility. If None (the default), a TargetPython | |
866 object will be constructed from the running Python. | |
867 """ | |
868 if session is None: | |
869 raise TypeError( | |
870 "PackageFinder.create() missing 1 required keyword argument: " | |
871 "'session'" | |
872 ) | |
873 if target_python is None: | |
874 target_python = TargetPython() | |
875 | |
876 candidate_prefs = CandidatePreferences( | |
877 prefer_binary=selection_prefs.prefer_binary, | |
878 allow_all_prereleases=selection_prefs.allow_all_prereleases, | |
879 ) | |
880 | |
881 return cls( | |
882 candidate_prefs=candidate_prefs, | |
883 search_scope=search_scope, | |
884 session=session, | |
885 target_python=target_python, | |
886 allow_yanked=selection_prefs.allow_yanked, | |
887 format_control=selection_prefs.format_control, | |
888 trusted_hosts=trusted_hosts, | |
889 ignore_requires_python=selection_prefs.ignore_requires_python, | |
890 ) | |
891 | |
892 @property | |
893 def find_links(self): | |
894 # type: () -> List[str] | |
895 return self.search_scope.find_links | |
896 | |
897 @property | |
898 def index_urls(self): | |
899 # type: () -> List[str] | |
900 return self.search_scope.index_urls | |
901 | |
902 @property | |
903 def allow_all_prereleases(self): | |
904 # type: () -> bool | |
905 return self._candidate_prefs.allow_all_prereleases | |
906 | |
907 def set_allow_all_prereleases(self): | |
908 # type: () -> None | |
909 self._candidate_prefs.allow_all_prereleases = True | |
910 | |
911 def add_trusted_host(self, host, source=None): | |
912 # type: (str, Optional[str]) -> None | |
913 """ | |
914 :param source: An optional source string, for logging where the host | |
915 string came from. | |
916 """ | |
917 # It is okay to add a previously added host because PipSession stores | |
918 # the resulting prefixes in a dict. | |
919 msg = 'adding trusted host: {!r}'.format(host) | |
920 if source is not None: | |
921 msg += ' (from {})'.format(source) | |
922 logger.info(msg) | |
923 self.session.add_insecure_host(host) | |
924 if host in self.trusted_hosts: | |
925 return | |
926 | |
927 self.trusted_hosts.append(host) | |
928 | |
929 def iter_secure_origins(self): | |
930 # type: () -> Iterator[SecureOrigin] | |
931 for secure_origin in SECURE_ORIGINS: | |
932 yield secure_origin | |
933 for host in self.trusted_hosts: | |
934 yield ('*', host, '*') | |
935 | |
936 @staticmethod | |
937 def _sort_locations(locations, expand_dir=False): | |
938 # type: (Sequence[str], bool) -> Tuple[List[str], List[str]] | |
939 """ | |
940 Sort locations into "files" (archives) and "urls", and return | |
941 a pair of lists (files,urls) | |
942 """ | |
943 files = [] | |
944 urls = [] | |
945 | |
946 # puts the url for the given file path into the appropriate list | |
947 def sort_path(path): | |
948 url = path_to_url(path) | |
949 if mimetypes.guess_type(url, strict=False)[0] == 'text/html': | |
950 urls.append(url) | |
951 else: | |
952 files.append(url) | |
953 | |
954 for url in locations: | |
955 | |
956 is_local_path = os.path.exists(url) | |
957 is_file_url = url.startswith('file:') | |
958 | |
959 if is_local_path or is_file_url: | |
960 if is_local_path: | |
961 path = url | |
962 else: | |
963 path = url_to_path(url) | |
964 if os.path.isdir(path): | |
965 if expand_dir: | |
966 path = os.path.realpath(path) | |
967 for item in os.listdir(path): | |
968 sort_path(os.path.join(path, item)) | |
969 elif is_file_url: | |
970 urls.append(url) | |
971 else: | |
972 logger.warning( | |
973 "Path '{0}' is ignored: " | |
974 "it is a directory.".format(path), | |
975 ) | |
976 elif os.path.isfile(path): | |
977 sort_path(path) | |
978 else: | |
979 logger.warning( | |
980 "Url '%s' is ignored: it is neither a file " | |
981 "nor a directory.", url, | |
982 ) | |
983 elif is_url(url): | |
984 # Only add url with clear scheme | |
985 urls.append(url) | |
986 else: | |
987 logger.warning( | |
988 "Url '%s' is ignored. It is either a non-existing " | |
989 "path or lacks a specific scheme.", url, | |
990 ) | |
991 | |
992 return files, urls | |
993 | |
994 def _validate_secure_origin(self, logger, location): | |
995 # type: (Logger, Link) -> bool | |
996 # Determine if this url used a secure transport mechanism | |
997 parsed = urllib_parse.urlparse(str(location)) | |
998 origin = (parsed.scheme, parsed.hostname, parsed.port) | |
999 | |
1000 # The protocol to use to see if the protocol matches. | |
1001 # Don't count the repository type as part of the protocol: in | |
1002 # cases such as "git+ssh", only use "ssh". (I.e., Only verify against | |
1003 # the last scheme.) | |
1004 protocol = origin[0].rsplit('+', 1)[-1] | |
1005 | |
1006 # Determine if our origin is a secure origin by looking through our | |
1007 # hardcoded list of secure origins, as well as any additional ones | |
1008 # configured on this PackageFinder instance. | |
1009 for secure_origin in self.iter_secure_origins(): | |
1010 if protocol != secure_origin[0] and secure_origin[0] != "*": | |
1011 continue | |
1012 | |
1013 try: | |
1014 # We need to do this decode dance to ensure that we have a | |
1015 # unicode object, even on Python 2.x. | |
1016 addr = ipaddress.ip_address( | |
1017 origin[1] | |
1018 if ( | |
1019 isinstance(origin[1], six.text_type) or | |
1020 origin[1] is None | |
1021 ) | |
1022 else origin[1].decode("utf8") | |
1023 ) | |
1024 network = ipaddress.ip_network( | |
1025 secure_origin[1] | |
1026 if isinstance(secure_origin[1], six.text_type) | |
1027 # setting secure_origin[1] to proper Union[bytes, str] | |
1028 # creates problems in other places | |
1029 else secure_origin[1].decode("utf8") # type: ignore | |
1030 ) | |
1031 except ValueError: | |
1032 # We don't have both a valid address or a valid network, so | |
1033 # we'll check this origin against hostnames. | |
1034 if (origin[1] and | |
1035 origin[1].lower() != secure_origin[1].lower() and | |
1036 secure_origin[1] != "*"): | |
1037 continue | |
1038 else: | |
1039 # We have a valid address and network, so see if the address | |
1040 # is contained within the network. | |
1041 if addr not in network: | |
1042 continue | |
1043 | |
1044 # Check to see if the port patches | |
1045 if (origin[2] != secure_origin[2] and | |
1046 secure_origin[2] != "*" and | |
1047 secure_origin[2] is not None): | |
1048 continue | |
1049 | |
1050 # If we've gotten here, then this origin matches the current | |
1051 # secure origin and we should return True | |
1052 return True | |
1053 | |
1054 # If we've gotten to this point, then the origin isn't secure and we | |
1055 # will not accept it as a valid location to search. We will however | |
1056 # log a warning that we are ignoring it. | |
1057 logger.warning( | |
1058 "The repository located at %s is not a trusted or secure host and " | |
1059 "is being ignored. If this repository is available via HTTPS we " | |
1060 "recommend you use HTTPS instead, otherwise you may silence " | |
1061 "this warning and allow it anyway with '--trusted-host %s'.", | |
1062 parsed.hostname, | |
1063 parsed.hostname, | |
1064 ) | |
1065 | |
1066 return False | |
1067 | |
1068 def make_link_evaluator(self, project_name): | |
1069 # type: (str) -> LinkEvaluator | |
1070 canonical_name = canonicalize_name(project_name) | |
1071 formats = self.format_control.get_allowed_formats(canonical_name) | |
1072 | |
1073 return LinkEvaluator( | |
1074 project_name=project_name, | |
1075 canonical_name=canonical_name, | |
1076 formats=formats, | |
1077 target_python=self._target_python, | |
1078 allow_yanked=self._allow_yanked, | |
1079 ignore_requires_python=self._ignore_requires_python, | |
1080 ) | |
1081 | |
1082 def find_all_candidates(self, project_name): | |
1083 # type: (str) -> List[InstallationCandidate] | |
1084 """Find all available InstallationCandidate for project_name | |
1085 | |
1086 This checks index_urls and find_links. | |
1087 All versions found are returned as an InstallationCandidate list. | |
1088 | |
1089 See LinkEvaluator.evaluate_link() for details on which files | |
1090 are accepted. | |
1091 """ | |
1092 search_scope = self.search_scope | |
1093 index_locations = search_scope.get_index_urls_locations(project_name) | |
1094 index_file_loc, index_url_loc = self._sort_locations(index_locations) | |
1095 fl_file_loc, fl_url_loc = self._sort_locations( | |
1096 self.find_links, expand_dir=True, | |
1097 ) | |
1098 | |
1099 file_locations = (Link(url) for url in itertools.chain( | |
1100 index_file_loc, fl_file_loc, | |
1101 )) | |
1102 | |
1103 # We trust every url that the user has given us whether it was given | |
1104 # via --index-url or --find-links. | |
1105 # We want to filter out any thing which does not have a secure origin. | |
1106 url_locations = [ | |
1107 link for link in itertools.chain( | |
1108 (Link(url) for url in index_url_loc), | |
1109 (Link(url) for url in fl_url_loc), | |
1110 ) | |
1111 if self._validate_secure_origin(logger, link) | |
1112 ] | |
1113 | |
1114 logger.debug('%d location(s) to search for versions of %s:', | |
1115 len(url_locations), project_name) | |
1116 | |
1117 for location in url_locations: | |
1118 logger.debug('* %s', location) | |
1119 | |
1120 link_evaluator = self.make_link_evaluator(project_name) | |
1121 find_links_versions = self._package_versions( | |
1122 link_evaluator, | |
1123 # We trust every directly linked archive in find_links | |
1124 (Link(url, '-f') for url in self.find_links), | |
1125 ) | |
1126 | |
1127 page_versions = [] | |
1128 for page in self._get_pages(url_locations, project_name): | |
1129 logger.debug('Analyzing links from page %s', page.url) | |
1130 with indent_log(): | |
1131 page_versions.extend( | |
1132 self._package_versions(link_evaluator, page.iter_links()) | |
1133 ) | |
1134 | |
1135 file_versions = self._package_versions(link_evaluator, file_locations) | |
1136 if file_versions: | |
1137 file_versions.sort(reverse=True) | |
1138 logger.debug( | |
1139 'Local files found: %s', | |
1140 ', '.join([ | |
1141 url_to_path(candidate.link.url) | |
1142 for candidate in file_versions | |
1143 ]) | |
1144 ) | |
1145 | |
1146 # This is an intentional priority ordering | |
1147 return file_versions + find_links_versions + page_versions | |
1148 | |
1149 def make_candidate_evaluator( | |
1150 self, | |
1151 project_name, # type: str | |
1152 specifier=None, # type: Optional[specifiers.BaseSpecifier] | |
1153 hashes=None, # type: Optional[Hashes] | |
1154 ): | |
1155 # type: (...) -> CandidateEvaluator | |
1156 """Create a CandidateEvaluator object to use. | |
1157 """ | |
1158 candidate_prefs = self._candidate_prefs | |
1159 return CandidateEvaluator.create( | |
1160 project_name=project_name, | |
1161 target_python=self._target_python, | |
1162 prefer_binary=candidate_prefs.prefer_binary, | |
1163 allow_all_prereleases=candidate_prefs.allow_all_prereleases, | |
1164 specifier=specifier, | |
1165 hashes=hashes, | |
1166 ) | |
1167 | |
1168 def find_candidates( | |
1169 self, | |
1170 project_name, # type: str | |
1171 specifier=None, # type: Optional[specifiers.BaseSpecifier] | |
1172 hashes=None, # type: Optional[Hashes] | |
1173 ): | |
1174 # type: (...) -> FoundCandidates | |
1175 """Find matches for the given project and specifier. | |
1176 | |
1177 :param specifier: An optional object implementing `filter` | |
1178 (e.g. `packaging.specifiers.SpecifierSet`) to filter applicable | |
1179 versions. | |
1180 | |
1181 :return: A `FoundCandidates` instance. | |
1182 """ | |
1183 candidates = self.find_all_candidates(project_name) | |
1184 candidate_evaluator = self.make_candidate_evaluator( | |
1185 project_name=project_name, | |
1186 specifier=specifier, | |
1187 hashes=hashes, | |
1188 ) | |
1189 return candidate_evaluator.make_found_candidates(candidates) | |
1190 | |
1191 def find_requirement(self, req, upgrade): | |
1192 # type: (InstallRequirement, bool) -> Optional[Link] | |
1193 """Try to find a Link matching req | |
1194 | |
1195 Expects req, an InstallRequirement and upgrade, a boolean | |
1196 Returns a Link if found, | |
1197 Raises DistributionNotFound or BestVersionAlreadyInstalled otherwise | |
1198 """ | |
1199 hashes = req.hashes(trust_internet=False) | |
1200 candidates = self.find_candidates( | |
1201 req.name, specifier=req.specifier, hashes=hashes, | |
1202 ) | |
1203 best_candidate = candidates.get_best() | |
1204 | |
1205 installed_version = None # type: Optional[_BaseVersion] | |
1206 if req.satisfied_by is not None: | |
1207 installed_version = parse_version(req.satisfied_by.version) | |
1208 | |
1209 def _format_versions(cand_iter): | |
1210 # This repeated parse_version and str() conversion is needed to | |
1211 # handle different vendoring sources from pip and pkg_resources. | |
1212 # If we stop using the pkg_resources provided specifier and start | |
1213 # using our own, we can drop the cast to str(). | |
1214 return ", ".join(sorted( | |
1215 {str(c.version) for c in cand_iter}, | |
1216 key=parse_version, | |
1217 )) or "none" | |
1218 | |
1219 if installed_version is None and best_candidate is None: | |
1220 logger.critical( | |
1221 'Could not find a version that satisfies the requirement %s ' | |
1222 '(from versions: %s)', | |
1223 req, | |
1224 _format_versions(candidates.iter_all()), | |
1225 ) | |
1226 | |
1227 raise DistributionNotFound( | |
1228 'No matching distribution found for %s' % req | |
1229 ) | |
1230 | |
1231 best_installed = False | |
1232 if installed_version and ( | |
1233 best_candidate is None or | |
1234 best_candidate.version <= installed_version): | |
1235 best_installed = True | |
1236 | |
1237 if not upgrade and installed_version is not None: | |
1238 if best_installed: | |
1239 logger.debug( | |
1240 'Existing installed version (%s) is most up-to-date and ' | |
1241 'satisfies requirement', | |
1242 installed_version, | |
1243 ) | |
1244 else: | |
1245 logger.debug( | |
1246 'Existing installed version (%s) satisfies requirement ' | |
1247 '(most up-to-date version is %s)', | |
1248 installed_version, | |
1249 best_candidate.version, | |
1250 ) | |
1251 return None | |
1252 | |
1253 if best_installed: | |
1254 # We have an existing version, and its the best version | |
1255 logger.debug( | |
1256 'Installed version (%s) is most up-to-date (past versions: ' | |
1257 '%s)', | |
1258 installed_version, | |
1259 _format_versions(candidates.iter_applicable()), | |
1260 ) | |
1261 raise BestVersionAlreadyInstalled | |
1262 | |
1263 logger.debug( | |
1264 'Using version %s (newest of versions: %s)', | |
1265 best_candidate.version, | |
1266 _format_versions(candidates.iter_applicable()), | |
1267 ) | |
1268 return best_candidate.link | |
1269 | |
1270 def _get_pages(self, locations, project_name): | |
1271 # type: (Iterable[Link], str) -> Iterable[HTMLPage] | |
1272 """ | |
1273 Yields (page, page_url) from the given locations, skipping | |
1274 locations that have errors. | |
1275 """ | |
1276 seen = set() # type: Set[Link] | |
1277 for location in locations: | |
1278 if location in seen: | |
1279 continue | |
1280 seen.add(location) | |
1281 | |
1282 page = _get_html_page(location, session=self.session) | |
1283 if page is None: | |
1284 continue | |
1285 | |
1286 yield page | |
1287 | |
1288 def _sort_links(self, links): | |
1289 # type: (Iterable[Link]) -> List[Link] | |
1290 """ | |
1291 Returns elements of links in order, non-egg links first, egg links | |
1292 second, while eliminating duplicates | |
1293 """ | |
1294 eggs, no_eggs = [], [] | |
1295 seen = set() # type: Set[Link] | |
1296 for link in links: | |
1297 if link not in seen: | |
1298 seen.add(link) | |
1299 if link.egg_fragment: | |
1300 eggs.append(link) | |
1301 else: | |
1302 no_eggs.append(link) | |
1303 return no_eggs + eggs | |
1304 | |
1305 def _log_skipped_link(self, link, reason): | |
1306 # type: (Link, Text) -> None | |
1307 if link not in self._logged_links: | |
1308 # Mark this as a unicode string to prevent "UnicodeEncodeError: | |
1309 # 'ascii' codec can't encode character" in Python 2 when | |
1310 # the reason contains non-ascii characters. | |
1311 # Also, put the link at the end so the reason is more visible | |
1312 # and because the link string is usually very long. | |
1313 logger.debug(u'Skipping link: %s: %s', reason, link) | |
1314 self._logged_links.add(link) | |
1315 | |
1316 def get_install_candidate(self, link_evaluator, link): | |
1317 # type: (LinkEvaluator, Link) -> Optional[InstallationCandidate] | |
1318 """ | |
1319 If the link is a candidate for install, convert it to an | |
1320 InstallationCandidate and return it. Otherwise, return None. | |
1321 """ | |
1322 is_candidate, result = link_evaluator.evaluate_link(link) | |
1323 if not is_candidate: | |
1324 if result: | |
1325 self._log_skipped_link(link, reason=result) | |
1326 return None | |
1327 | |
1328 return InstallationCandidate( | |
1329 project=link_evaluator.project_name, | |
1330 link=link, | |
1331 # Convert the Text result to str since InstallationCandidate | |
1332 # accepts str. | |
1333 version=str(result), | |
1334 ) | |
1335 | |
1336 def _package_versions(self, link_evaluator, links): | |
1337 # type: (LinkEvaluator, Iterable[Link]) -> List[InstallationCandidate] | |
1338 result = [] | |
1339 for link in self._sort_links(links): | |
1340 candidate = self.get_install_candidate(link_evaluator, link) | |
1341 if candidate is not None: | |
1342 result.append(candidate) | |
1343 return result | |
1344 | |
1345 | |
1346 def _find_name_version_sep(fragment, canonical_name): | |
1347 # type: (str, str) -> int | |
1348 """Find the separator's index based on the package's canonical name. | |
1349 | |
1350 :param fragment: A <package>+<version> filename "fragment" (stem) or | |
1351 egg fragment. | |
1352 :param canonical_name: The package's canonical name. | |
1353 | |
1354 This function is needed since the canonicalized name does not necessarily | |
1355 have the same length as the egg info's name part. An example:: | |
1356 | |
1357 >>> fragment = 'foo__bar-1.0' | |
1358 >>> canonical_name = 'foo-bar' | |
1359 >>> _find_name_version_sep(fragment, canonical_name) | |
1360 8 | |
1361 """ | |
1362 # Project name and version must be separated by one single dash. Find all | |
1363 # occurrences of dashes; if the string in front of it matches the canonical | |
1364 # name, this is the one separating the name and version parts. | |
1365 for i, c in enumerate(fragment): | |
1366 if c != "-": | |
1367 continue | |
1368 if canonicalize_name(fragment[:i]) == canonical_name: | |
1369 return i | |
1370 raise ValueError("{} does not match {}".format(fragment, canonical_name)) | |
1371 | |
1372 | |
1373 def _extract_version_from_fragment(fragment, canonical_name): | |
1374 # type: (str, str) -> Optional[str] | |
1375 """Parse the version string from a <package>+<version> filename | |
1376 "fragment" (stem) or egg fragment. | |
1377 | |
1378 :param fragment: The string to parse. E.g. foo-2.1 | |
1379 :param canonical_name: The canonicalized name of the package this | |
1380 belongs to. | |
1381 """ | |
1382 try: | |
1383 version_start = _find_name_version_sep(fragment, canonical_name) + 1 | |
1384 except ValueError: | |
1385 return None | |
1386 version = fragment[version_start:] | |
1387 if not version: | |
1388 return None | |
1389 return version | |
1390 | |
1391 | |
1392 def _determine_base_url(document, page_url): | |
1393 """Determine the HTML document's base URL. | |
1394 | |
1395 This looks for a ``<base>`` tag in the HTML document. If present, its href | |
1396 attribute denotes the base URL of anchor tags in the document. If there is | |
1397 no such tag (or if it does not have a valid href attribute), the HTML | |
1398 file's URL is used as the base URL. | |
1399 | |
1400 :param document: An HTML document representation. The current | |
1401 implementation expects the result of ``html5lib.parse()``. | |
1402 :param page_url: The URL of the HTML document. | |
1403 """ | |
1404 for base in document.findall(".//base"): | |
1405 href = base.get("href") | |
1406 if href is not None: | |
1407 return href | |
1408 return page_url | |
1409 | |
1410 | |
1411 def _get_encoding_from_headers(headers): | |
1412 """Determine if we have any encoding information in our headers. | |
1413 """ | |
1414 if headers and "Content-Type" in headers: | |
1415 content_type, params = cgi.parse_header(headers["Content-Type"]) | |
1416 if "charset" in params: | |
1417 return params['charset'] | |
1418 return None | |
1419 | |
1420 | |
1421 def _clean_link(url): | |
1422 # type: (str) -> str | |
1423 """Makes sure a link is fully encoded. That is, if a ' ' shows up in | |
1424 the link, it will be rewritten to %20 (while not over-quoting | |
1425 % or other characters).""" | |
1426 # Split the URL into parts according to the general structure | |
1427 # `scheme://netloc/path;parameters?query#fragment`. Note that the | |
1428 # `netloc` can be empty and the URI will then refer to a local | |
1429 # filesystem path. | |
1430 result = urllib_parse.urlparse(url) | |
1431 # In both cases below we unquote prior to quoting to make sure | |
1432 # nothing is double quoted. | |
1433 if result.netloc == "": | |
1434 # On Windows the path part might contain a drive letter which | |
1435 # should not be quoted. On Linux where drive letters do not | |
1436 # exist, the colon should be quoted. We rely on urllib.request | |
1437 # to do the right thing here. | |
1438 path = urllib_request.pathname2url( | |
1439 urllib_request.url2pathname(result.path)) | |
1440 else: | |
1441 # In addition to the `/` character we protect `@` so that | |
1442 # revision strings in VCS URLs are properly parsed. | |
1443 path = urllib_parse.quote(urllib_parse.unquote(result.path), safe="/@") | |
1444 return urllib_parse.urlunparse(result._replace(path=path)) | |
1445 | |
1446 | |
1447 def _create_link_from_element( | |
1448 anchor, # type: HTMLElement | |
1449 page_url, # type: str | |
1450 base_url, # type: str | |
1451 ): | |
1452 # type: (...) -> Optional[Link] | |
1453 """ | |
1454 Convert an anchor element in a simple repository page to a Link. | |
1455 """ | |
1456 href = anchor.get("href") | |
1457 if not href: | |
1458 return None | |
1459 | |
1460 url = _clean_link(urllib_parse.urljoin(base_url, href)) | |
1461 pyrequire = anchor.get('data-requires-python') | |
1462 pyrequire = unescape(pyrequire) if pyrequire else None | |
1463 | |
1464 yanked_reason = anchor.get('data-yanked') | |
1465 if yanked_reason: | |
1466 # This is a unicode string in Python 2 (and 3). | |
1467 yanked_reason = unescape(yanked_reason) | |
1468 | |
1469 link = Link( | |
1470 url, | |
1471 comes_from=page_url, | |
1472 requires_python=pyrequire, | |
1473 yanked_reason=yanked_reason, | |
1474 ) | |
1475 | |
1476 return link | |
1477 | |
1478 | |
1479 class HTMLPage(object): | |
1480 """Represents one page, along with its URL""" | |
1481 | |
1482 def __init__(self, content, url, headers=None): | |
1483 # type: (bytes, str, MutableMapping[str, str]) -> None | |
1484 self.content = content | |
1485 self.url = url | |
1486 self.headers = headers | |
1487 | |
1488 def __str__(self): | |
1489 return redact_password_from_url(self.url) | |
1490 | |
1491 def iter_links(self): | |
1492 # type: () -> Iterable[Link] | |
1493 """Yields all links in the page""" | |
1494 document = html5lib.parse( | |
1495 self.content, | |
1496 transport_encoding=_get_encoding_from_headers(self.headers), | |
1497 namespaceHTMLElements=False, | |
1498 ) | |
1499 base_url = _determine_base_url(document, self.url) | |
1500 for anchor in document.findall(".//a"): | |
1501 link = _create_link_from_element( | |
1502 anchor, | |
1503 page_url=self.url, | |
1504 base_url=base_url, | |
1505 ) | |
1506 if link is None: | |
1507 continue | |
1508 yield link |