Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/distlib/locators.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
| author | shellac |
|---|---|
| date | Mon, 01 Jun 2020 08:59:25 -0400 |
| parents | 79f47841a781 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:79f47841a781 | 5:9b1c78e6ba9c |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 # | |
| 3 # Copyright (C) 2012-2015 Vinay Sajip. | |
| 4 # Licensed to the Python Software Foundation under a contributor agreement. | |
| 5 # See LICENSE.txt and CONTRIBUTORS.txt. | |
| 6 # | |
| 7 | |
| 8 import gzip | |
| 9 from io import BytesIO | |
| 10 import json | |
| 11 import logging | |
| 12 import os | |
| 13 import posixpath | |
| 14 import re | |
| 15 try: | |
| 16 import threading | |
| 17 except ImportError: # pragma: no cover | |
| 18 import dummy_threading as threading | |
| 19 import zlib | |
| 20 | |
| 21 from . import DistlibException | |
| 22 from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url, | |
| 23 queue, quote, unescape, string_types, build_opener, | |
| 24 HTTPRedirectHandler as BaseRedirectHandler, text_type, | |
| 25 Request, HTTPError, URLError) | |
| 26 from .database import Distribution, DistributionPath, make_dist | |
| 27 from .metadata import Metadata, MetadataInvalidError | |
| 28 from .util import (cached_property, parse_credentials, ensure_slash, | |
| 29 split_filename, get_project_data, parse_requirement, | |
| 30 parse_name_and_version, ServerProxy, normalize_name) | |
| 31 from .version import get_scheme, UnsupportedVersionError | |
| 32 from .wheel import Wheel, is_compatible | |
| 33 | |
| 34 logger = logging.getLogger(__name__) | |
| 35 | |
| 36 HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)') | |
| 37 CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I) | |
| 38 HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml') | |
| 39 DEFAULT_INDEX = 'https://pypi.org/pypi' | |
| 40 | |
| 41 def get_all_distribution_names(url=None): | |
| 42 """ | |
| 43 Return all distribution names known by an index. | |
| 44 :param url: The URL of the index. | |
| 45 :return: A list of all known distribution names. | |
| 46 """ | |
| 47 if url is None: | |
| 48 url = DEFAULT_INDEX | |
| 49 client = ServerProxy(url, timeout=3.0) | |
| 50 try: | |
| 51 return client.list_packages() | |
| 52 finally: | |
| 53 client('close')() | |
| 54 | |
| 55 class RedirectHandler(BaseRedirectHandler): | |
| 56 """ | |
| 57 A class to work around a bug in some Python 3.2.x releases. | |
| 58 """ | |
| 59 # There's a bug in the base version for some 3.2.x | |
| 60 # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header | |
| 61 # returns e.g. /abc, it bails because it says the scheme '' | |
| 62 # is bogus, when actually it should use the request's | |
| 63 # URL for the scheme. See Python issue #13696. | |
| 64 def http_error_302(self, req, fp, code, msg, headers): | |
| 65 # Some servers (incorrectly) return multiple Location headers | |
| 66 # (so probably same goes for URI). Use first header. | |
| 67 newurl = None | |
| 68 for key in ('location', 'uri'): | |
| 69 if key in headers: | |
| 70 newurl = headers[key] | |
| 71 break | |
| 72 if newurl is None: # pragma: no cover | |
| 73 return | |
| 74 urlparts = urlparse(newurl) | |
| 75 if urlparts.scheme == '': | |
| 76 newurl = urljoin(req.get_full_url(), newurl) | |
| 77 if hasattr(headers, 'replace_header'): | |
| 78 headers.replace_header(key, newurl) | |
| 79 else: | |
| 80 headers[key] = newurl | |
| 81 return BaseRedirectHandler.http_error_302(self, req, fp, code, msg, | |
| 82 headers) | |
| 83 | |
| 84 http_error_301 = http_error_303 = http_error_307 = http_error_302 | |
| 85 | |
| 86 class Locator(object): | |
| 87 """ | |
| 88 A base class for locators - things that locate distributions. | |
| 89 """ | |
| 90 source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz') | |
| 91 binary_extensions = ('.egg', '.exe', '.whl') | |
| 92 excluded_extensions = ('.pdf',) | |
| 93 | |
| 94 # A list of tags indicating which wheels you want to match. The default | |
| 95 # value of None matches against the tags compatible with the running | |
| 96 # Python. If you want to match other values, set wheel_tags on a locator | |
| 97 # instance to a list of tuples (pyver, abi, arch) which you want to match. | |
| 98 wheel_tags = None | |
| 99 | |
| 100 downloadable_extensions = source_extensions + ('.whl',) | |
| 101 | |
| 102 def __init__(self, scheme='default'): | |
| 103 """ | |
| 104 Initialise an instance. | |
| 105 :param scheme: Because locators look for most recent versions, they | |
| 106 need to know the version scheme to use. This specifies | |
| 107 the current PEP-recommended scheme - use ``'legacy'`` | |
| 108 if you need to support existing distributions on PyPI. | |
| 109 """ | |
| 110 self._cache = {} | |
| 111 self.scheme = scheme | |
| 112 # Because of bugs in some of the handlers on some of the platforms, | |
| 113 # we use our own opener rather than just using urlopen. | |
| 114 self.opener = build_opener(RedirectHandler()) | |
| 115 # If get_project() is called from locate(), the matcher instance | |
| 116 # is set from the requirement passed to locate(). See issue #18 for | |
| 117 # why this can be useful to know. | |
| 118 self.matcher = None | |
| 119 self.errors = queue.Queue() | |
| 120 | |
| 121 def get_errors(self): | |
| 122 """ | |
| 123 Return any errors which have occurred. | |
| 124 """ | |
| 125 result = [] | |
| 126 while not self.errors.empty(): # pragma: no cover | |
| 127 try: | |
| 128 e = self.errors.get(False) | |
| 129 result.append(e) | |
| 130 except self.errors.Empty: | |
| 131 continue | |
| 132 self.errors.task_done() | |
| 133 return result | |
| 134 | |
| 135 def clear_errors(self): | |
| 136 """ | |
| 137 Clear any errors which may have been logged. | |
| 138 """ | |
| 139 # Just get the errors and throw them away | |
| 140 self.get_errors() | |
| 141 | |
| 142 def clear_cache(self): | |
| 143 self._cache.clear() | |
| 144 | |
| 145 def _get_scheme(self): | |
| 146 return self._scheme | |
| 147 | |
| 148 def _set_scheme(self, value): | |
| 149 self._scheme = value | |
| 150 | |
| 151 scheme = property(_get_scheme, _set_scheme) | |
| 152 | |
| 153 def _get_project(self, name): | |
| 154 """ | |
| 155 For a given project, get a dictionary mapping available versions to Distribution | |
| 156 instances. | |
| 157 | |
| 158 This should be implemented in subclasses. | |
| 159 | |
| 160 If called from a locate() request, self.matcher will be set to a | |
| 161 matcher for the requirement to satisfy, otherwise it will be None. | |
| 162 """ | |
| 163 raise NotImplementedError('Please implement in the subclass') | |
| 164 | |
| 165 def get_distribution_names(self): | |
| 166 """ | |
| 167 Return all the distribution names known to this locator. | |
| 168 """ | |
| 169 raise NotImplementedError('Please implement in the subclass') | |
| 170 | |
| 171 def get_project(self, name): | |
| 172 """ | |
| 173 For a given project, get a dictionary mapping available versions to Distribution | |
| 174 instances. | |
| 175 | |
| 176 This calls _get_project to do all the work, and just implements a caching layer on top. | |
| 177 """ | |
| 178 if self._cache is None: # pragma: no cover | |
| 179 result = self._get_project(name) | |
| 180 elif name in self._cache: | |
| 181 result = self._cache[name] | |
| 182 else: | |
| 183 self.clear_errors() | |
| 184 result = self._get_project(name) | |
| 185 self._cache[name] = result | |
| 186 return result | |
| 187 | |
| 188 def score_url(self, url): | |
| 189 """ | |
| 190 Give an url a score which can be used to choose preferred URLs | |
| 191 for a given project release. | |
| 192 """ | |
| 193 t = urlparse(url) | |
| 194 basename = posixpath.basename(t.path) | |
| 195 compatible = True | |
| 196 is_wheel = basename.endswith('.whl') | |
| 197 is_downloadable = basename.endswith(self.downloadable_extensions) | |
| 198 if is_wheel: | |
| 199 compatible = is_compatible(Wheel(basename), self.wheel_tags) | |
| 200 return (t.scheme == 'https', 'pypi.org' in t.netloc, | |
| 201 is_downloadable, is_wheel, compatible, basename) | |
| 202 | |
| 203 def prefer_url(self, url1, url2): | |
| 204 """ | |
| 205 Choose one of two URLs where both are candidates for distribution | |
| 206 archives for the same version of a distribution (for example, | |
| 207 .tar.gz vs. zip). | |
| 208 | |
| 209 The current implementation favours https:// URLs over http://, archives | |
| 210 from PyPI over those from other locations, wheel compatibility (if a | |
| 211 wheel) and then the archive name. | |
| 212 """ | |
| 213 result = url2 | |
| 214 if url1: | |
| 215 s1 = self.score_url(url1) | |
| 216 s2 = self.score_url(url2) | |
| 217 if s1 > s2: | |
| 218 result = url1 | |
| 219 if result != url2: | |
| 220 logger.debug('Not replacing %r with %r', url1, url2) | |
| 221 else: | |
| 222 logger.debug('Replacing %r with %r', url1, url2) | |
| 223 return result | |
| 224 | |
| 225 def split_filename(self, filename, project_name): | |
| 226 """ | |
| 227 Attempt to split a filename in project name, version and Python version. | |
| 228 """ | |
| 229 return split_filename(filename, project_name) | |
| 230 | |
| 231 def convert_url_to_download_info(self, url, project_name): | |
| 232 """ | |
| 233 See if a URL is a candidate for a download URL for a project (the URL | |
| 234 has typically been scraped from an HTML page). | |
| 235 | |
| 236 If it is, a dictionary is returned with keys "name", "version", | |
| 237 "filename" and "url"; otherwise, None is returned. | |
| 238 """ | |
| 239 def same_project(name1, name2): | |
| 240 return normalize_name(name1) == normalize_name(name2) | |
| 241 | |
| 242 result = None | |
| 243 scheme, netloc, path, params, query, frag = urlparse(url) | |
| 244 if frag.lower().startswith('egg='): # pragma: no cover | |
| 245 logger.debug('%s: version hint in fragment: %r', | |
| 246 project_name, frag) | |
| 247 m = HASHER_HASH.match(frag) | |
| 248 if m: | |
| 249 algo, digest = m.groups() | |
| 250 else: | |
| 251 algo, digest = None, None | |
| 252 origpath = path | |
| 253 if path and path[-1] == '/': # pragma: no cover | |
| 254 path = path[:-1] | |
| 255 if path.endswith('.whl'): | |
| 256 try: | |
| 257 wheel = Wheel(path) | |
| 258 if not is_compatible(wheel, self.wheel_tags): | |
| 259 logger.debug('Wheel not compatible: %s', path) | |
| 260 else: | |
| 261 if project_name is None: | |
| 262 include = True | |
| 263 else: | |
| 264 include = same_project(wheel.name, project_name) | |
| 265 if include: | |
| 266 result = { | |
| 267 'name': wheel.name, | |
| 268 'version': wheel.version, | |
| 269 'filename': wheel.filename, | |
| 270 'url': urlunparse((scheme, netloc, origpath, | |
| 271 params, query, '')), | |
| 272 'python-version': ', '.join( | |
| 273 ['.'.join(list(v[2:])) for v in wheel.pyver]), | |
| 274 } | |
| 275 except Exception as e: # pragma: no cover | |
| 276 logger.warning('invalid path for wheel: %s', path) | |
| 277 elif not path.endswith(self.downloadable_extensions): # pragma: no cover | |
| 278 logger.debug('Not downloadable: %s', path) | |
| 279 else: # downloadable extension | |
| 280 path = filename = posixpath.basename(path) | |
| 281 for ext in self.downloadable_extensions: | |
| 282 if path.endswith(ext): | |
| 283 path = path[:-len(ext)] | |
| 284 t = self.split_filename(path, project_name) | |
| 285 if not t: # pragma: no cover | |
| 286 logger.debug('No match for project/version: %s', path) | |
| 287 else: | |
| 288 name, version, pyver = t | |
| 289 if not project_name or same_project(project_name, name): | |
| 290 result = { | |
| 291 'name': name, | |
| 292 'version': version, | |
| 293 'filename': filename, | |
| 294 'url': urlunparse((scheme, netloc, origpath, | |
| 295 params, query, '')), | |
| 296 #'packagetype': 'sdist', | |
| 297 } | |
| 298 if pyver: # pragma: no cover | |
| 299 result['python-version'] = pyver | |
| 300 break | |
| 301 if result and algo: | |
| 302 result['%s_digest' % algo] = digest | |
| 303 return result | |
| 304 | |
| 305 def _get_digest(self, info): | |
| 306 """ | |
| 307 Get a digest from a dictionary by looking at a "digests" dictionary | |
| 308 or keys of the form 'algo_digest'. | |
| 309 | |
| 310 Returns a 2-tuple (algo, digest) if found, else None. Currently | |
| 311 looks only for SHA256, then MD5. | |
| 312 """ | |
| 313 result = None | |
| 314 if 'digests' in info: | |
| 315 digests = info['digests'] | |
| 316 for algo in ('sha256', 'md5'): | |
| 317 if algo in digests: | |
| 318 result = (algo, digests[algo]) | |
| 319 break | |
| 320 if not result: | |
| 321 for algo in ('sha256', 'md5'): | |
| 322 key = '%s_digest' % algo | |
| 323 if key in info: | |
| 324 result = (algo, info[key]) | |
| 325 break | |
| 326 return result | |
| 327 | |
| 328 def _update_version_data(self, result, info): | |
| 329 """ | |
| 330 Update a result dictionary (the final result from _get_project) with a | |
| 331 dictionary for a specific version, which typically holds information | |
| 332 gleaned from a filename or URL for an archive for the distribution. | |
| 333 """ | |
| 334 name = info.pop('name') | |
| 335 version = info.pop('version') | |
| 336 if version in result: | |
| 337 dist = result[version] | |
| 338 md = dist.metadata | |
| 339 else: | |
| 340 dist = make_dist(name, version, scheme=self.scheme) | |
| 341 md = dist.metadata | |
| 342 dist.digest = digest = self._get_digest(info) | |
| 343 url = info['url'] | |
| 344 result['digests'][url] = digest | |
| 345 if md.source_url != info['url']: | |
| 346 md.source_url = self.prefer_url(md.source_url, url) | |
| 347 result['urls'].setdefault(version, set()).add(url) | |
| 348 dist.locator = self | |
| 349 result[version] = dist | |
| 350 | |
| 351 def locate(self, requirement, prereleases=False): | |
| 352 """ | |
| 353 Find the most recent distribution which matches the given | |
| 354 requirement. | |
| 355 | |
| 356 :param requirement: A requirement of the form 'foo (1.0)' or perhaps | |
| 357 'foo (>= 1.0, < 2.0, != 1.3)' | |
| 358 :param prereleases: If ``True``, allow pre-release versions | |
| 359 to be located. Otherwise, pre-release versions | |
| 360 are not returned. | |
| 361 :return: A :class:`Distribution` instance, or ``None`` if no such | |
| 362 distribution could be located. | |
| 363 """ | |
| 364 result = None | |
| 365 r = parse_requirement(requirement) | |
| 366 if r is None: # pragma: no cover | |
| 367 raise DistlibException('Not a valid requirement: %r' % requirement) | |
| 368 scheme = get_scheme(self.scheme) | |
| 369 self.matcher = matcher = scheme.matcher(r.requirement) | |
| 370 logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__) | |
| 371 versions = self.get_project(r.name) | |
| 372 if len(versions) > 2: # urls and digests keys are present | |
| 373 # sometimes, versions are invalid | |
| 374 slist = [] | |
| 375 vcls = matcher.version_class | |
| 376 for k in versions: | |
| 377 if k in ('urls', 'digests'): | |
| 378 continue | |
| 379 try: | |
| 380 if not matcher.match(k): | |
| 381 logger.debug('%s did not match %r', matcher, k) | |
| 382 else: | |
| 383 if prereleases or not vcls(k).is_prerelease: | |
| 384 slist.append(k) | |
| 385 else: | |
| 386 logger.debug('skipping pre-release ' | |
| 387 'version %s of %s', k, matcher.name) | |
| 388 except Exception: # pragma: no cover | |
| 389 logger.warning('error matching %s with %r', matcher, k) | |
| 390 pass # slist.append(k) | |
| 391 if len(slist) > 1: | |
| 392 slist = sorted(slist, key=scheme.key) | |
| 393 if slist: | |
| 394 logger.debug('sorted list: %s', slist) | |
| 395 version = slist[-1] | |
| 396 result = versions[version] | |
| 397 if result: | |
| 398 if r.extras: | |
| 399 result.extras = r.extras | |
| 400 result.download_urls = versions.get('urls', {}).get(version, set()) | |
| 401 d = {} | |
| 402 sd = versions.get('digests', {}) | |
| 403 for url in result.download_urls: | |
| 404 if url in sd: # pragma: no cover | |
| 405 d[url] = sd[url] | |
| 406 result.digests = d | |
| 407 self.matcher = None | |
| 408 return result | |
| 409 | |
| 410 | |
| 411 class PyPIRPCLocator(Locator): | |
| 412 """ | |
| 413 This locator uses XML-RPC to locate distributions. It therefore | |
| 414 cannot be used with simple mirrors (that only mirror file content). | |
| 415 """ | |
| 416 def __init__(self, url, **kwargs): | |
| 417 """ | |
| 418 Initialise an instance. | |
| 419 | |
| 420 :param url: The URL to use for XML-RPC. | |
| 421 :param kwargs: Passed to the superclass constructor. | |
| 422 """ | |
| 423 super(PyPIRPCLocator, self).__init__(**kwargs) | |
| 424 self.base_url = url | |
| 425 self.client = ServerProxy(url, timeout=3.0) | |
| 426 | |
| 427 def get_distribution_names(self): | |
| 428 """ | |
| 429 Return all the distribution names known to this locator. | |
| 430 """ | |
| 431 return set(self.client.list_packages()) | |
| 432 | |
| 433 def _get_project(self, name): | |
| 434 result = {'urls': {}, 'digests': {}} | |
| 435 versions = self.client.package_releases(name, True) | |
| 436 for v in versions: | |
| 437 urls = self.client.release_urls(name, v) | |
| 438 data = self.client.release_data(name, v) | |
| 439 metadata = Metadata(scheme=self.scheme) | |
| 440 metadata.name = data['name'] | |
| 441 metadata.version = data['version'] | |
| 442 metadata.license = data.get('license') | |
| 443 metadata.keywords = data.get('keywords', []) | |
| 444 metadata.summary = data.get('summary') | |
| 445 dist = Distribution(metadata) | |
| 446 if urls: | |
| 447 info = urls[0] | |
| 448 metadata.source_url = info['url'] | |
| 449 dist.digest = self._get_digest(info) | |
| 450 dist.locator = self | |
| 451 result[v] = dist | |
| 452 for info in urls: | |
| 453 url = info['url'] | |
| 454 digest = self._get_digest(info) | |
| 455 result['urls'].setdefault(v, set()).add(url) | |
| 456 result['digests'][url] = digest | |
| 457 return result | |
| 458 | |
| 459 class PyPIJSONLocator(Locator): | |
| 460 """ | |
| 461 This locator uses PyPI's JSON interface. It's very limited in functionality | |
| 462 and probably not worth using. | |
| 463 """ | |
| 464 def __init__(self, url, **kwargs): | |
| 465 super(PyPIJSONLocator, self).__init__(**kwargs) | |
| 466 self.base_url = ensure_slash(url) | |
| 467 | |
| 468 def get_distribution_names(self): | |
| 469 """ | |
| 470 Return all the distribution names known to this locator. | |
| 471 """ | |
| 472 raise NotImplementedError('Not available from this locator') | |
| 473 | |
| 474 def _get_project(self, name): | |
| 475 result = {'urls': {}, 'digests': {}} | |
| 476 url = urljoin(self.base_url, '%s/json' % quote(name)) | |
| 477 try: | |
| 478 resp = self.opener.open(url) | |
| 479 data = resp.read().decode() # for now | |
| 480 d = json.loads(data) | |
| 481 md = Metadata(scheme=self.scheme) | |
| 482 data = d['info'] | |
| 483 md.name = data['name'] | |
| 484 md.version = data['version'] | |
| 485 md.license = data.get('license') | |
| 486 md.keywords = data.get('keywords', []) | |
| 487 md.summary = data.get('summary') | |
| 488 dist = Distribution(md) | |
| 489 dist.locator = self | |
| 490 urls = d['urls'] | |
| 491 result[md.version] = dist | |
| 492 for info in d['urls']: | |
| 493 url = info['url'] | |
| 494 dist.download_urls.add(url) | |
| 495 dist.digests[url] = self._get_digest(info) | |
| 496 result['urls'].setdefault(md.version, set()).add(url) | |
| 497 result['digests'][url] = self._get_digest(info) | |
| 498 # Now get other releases | |
| 499 for version, infos in d['releases'].items(): | |
| 500 if version == md.version: | |
| 501 continue # already done | |
| 502 omd = Metadata(scheme=self.scheme) | |
| 503 omd.name = md.name | |
| 504 omd.version = version | |
| 505 odist = Distribution(omd) | |
| 506 odist.locator = self | |
| 507 result[version] = odist | |
| 508 for info in infos: | |
| 509 url = info['url'] | |
| 510 odist.download_urls.add(url) | |
| 511 odist.digests[url] = self._get_digest(info) | |
| 512 result['urls'].setdefault(version, set()).add(url) | |
| 513 result['digests'][url] = self._get_digest(info) | |
| 514 # for info in urls: | |
| 515 # md.source_url = info['url'] | |
| 516 # dist.digest = self._get_digest(info) | |
| 517 # dist.locator = self | |
| 518 # for info in urls: | |
| 519 # url = info['url'] | |
| 520 # result['urls'].setdefault(md.version, set()).add(url) | |
| 521 # result['digests'][url] = self._get_digest(info) | |
| 522 except Exception as e: | |
| 523 self.errors.put(text_type(e)) | |
| 524 logger.exception('JSON fetch failed: %s', e) | |
| 525 return result | |
| 526 | |
| 527 | |
| 528 class Page(object): | |
| 529 """ | |
| 530 This class represents a scraped HTML page. | |
| 531 """ | |
| 532 # The following slightly hairy-looking regex just looks for the contents of | |
| 533 # an anchor link, which has an attribute "href" either immediately preceded | |
| 534 # or immediately followed by a "rel" attribute. The attribute values can be | |
| 535 # declared with double quotes, single quotes or no quotes - which leads to | |
| 536 # the length of the expression. | |
| 537 _href = re.compile(""" | |
| 538 (rel\\s*=\\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\\s\n]*))\\s+)? | |
| 539 href\\s*=\\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\\s\n]*)) | |
| 540 (\\s+rel\\s*=\\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\\s\n]*)))? | |
| 541 """, re.I | re.S | re.X) | |
| 542 _base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S) | |
| 543 | |
| 544 def __init__(self, data, url): | |
| 545 """ | |
| 546 Initialise an instance with the Unicode page contents and the URL they | |
| 547 came from. | |
| 548 """ | |
| 549 self.data = data | |
| 550 self.base_url = self.url = url | |
| 551 m = self._base.search(self.data) | |
| 552 if m: | |
| 553 self.base_url = m.group(1) | |
| 554 | |
| 555 _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I) | |
| 556 | |
| 557 @cached_property | |
| 558 def links(self): | |
| 559 """ | |
| 560 Return the URLs of all the links on a page together with information | |
| 561 about their "rel" attribute, for determining which ones to treat as | |
| 562 downloads and which ones to queue for further scraping. | |
| 563 """ | |
| 564 def clean(url): | |
| 565 "Tidy up an URL." | |
| 566 scheme, netloc, path, params, query, frag = urlparse(url) | |
| 567 return urlunparse((scheme, netloc, quote(path), | |
| 568 params, query, frag)) | |
| 569 | |
| 570 result = set() | |
| 571 for match in self._href.finditer(self.data): | |
| 572 d = match.groupdict('') | |
| 573 rel = (d['rel1'] or d['rel2'] or d['rel3'] or | |
| 574 d['rel4'] or d['rel5'] or d['rel6']) | |
| 575 url = d['url1'] or d['url2'] or d['url3'] | |
| 576 url = urljoin(self.base_url, url) | |
| 577 url = unescape(url) | |
| 578 url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url) | |
| 579 result.add((url, rel)) | |
| 580 # We sort the result, hoping to bring the most recent versions | |
| 581 # to the front | |
| 582 result = sorted(result, key=lambda t: t[0], reverse=True) | |
| 583 return result | |
| 584 | |
| 585 | |
| 586 class SimpleScrapingLocator(Locator): | |
| 587 """ | |
| 588 A locator which scrapes HTML pages to locate downloads for a distribution. | |
| 589 This runs multiple threads to do the I/O; performance is at least as good | |
| 590 as pip's PackageFinder, which works in an analogous fashion. | |
| 591 """ | |
| 592 | |
| 593 # These are used to deal with various Content-Encoding schemes. | |
| 594 decoders = { | |
| 595 'deflate': zlib.decompress, | |
| 596 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(d)).read(), | |
| 597 'none': lambda b: b, | |
| 598 } | |
| 599 | |
| 600 def __init__(self, url, timeout=None, num_workers=10, **kwargs): | |
| 601 """ | |
| 602 Initialise an instance. | |
| 603 :param url: The root URL to use for scraping. | |
| 604 :param timeout: The timeout, in seconds, to be applied to requests. | |
| 605 This defaults to ``None`` (no timeout specified). | |
| 606 :param num_workers: The number of worker threads you want to do I/O, | |
| 607 This defaults to 10. | |
| 608 :param kwargs: Passed to the superclass. | |
| 609 """ | |
| 610 super(SimpleScrapingLocator, self).__init__(**kwargs) | |
| 611 self.base_url = ensure_slash(url) | |
| 612 self.timeout = timeout | |
| 613 self._page_cache = {} | |
| 614 self._seen = set() | |
| 615 self._to_fetch = queue.Queue() | |
| 616 self._bad_hosts = set() | |
| 617 self.skip_externals = False | |
| 618 self.num_workers = num_workers | |
| 619 self._lock = threading.RLock() | |
| 620 # See issue #45: we need to be resilient when the locator is used | |
| 621 # in a thread, e.g. with concurrent.futures. We can't use self._lock | |
| 622 # as it is for coordinating our internal threads - the ones created | |
| 623 # in _prepare_threads. | |
| 624 self._gplock = threading.RLock() | |
| 625 self.platform_check = False # See issue #112 | |
| 626 | |
| 627 def _prepare_threads(self): | |
| 628 """ | |
| 629 Threads are created only when get_project is called, and terminate | |
| 630 before it returns. They are there primarily to parallelise I/O (i.e. | |
| 631 fetching web pages). | |
| 632 """ | |
| 633 self._threads = [] | |
| 634 for i in range(self.num_workers): | |
| 635 t = threading.Thread(target=self._fetch) | |
| 636 t.setDaemon(True) | |
| 637 t.start() | |
| 638 self._threads.append(t) | |
| 639 | |
| 640 def _wait_threads(self): | |
| 641 """ | |
| 642 Tell all the threads to terminate (by sending a sentinel value) and | |
| 643 wait for them to do so. | |
| 644 """ | |
| 645 # Note that you need two loops, since you can't say which | |
| 646 # thread will get each sentinel | |
| 647 for t in self._threads: | |
| 648 self._to_fetch.put(None) # sentinel | |
| 649 for t in self._threads: | |
| 650 t.join() | |
| 651 self._threads = [] | |
| 652 | |
| 653 def _get_project(self, name): | |
| 654 result = {'urls': {}, 'digests': {}} | |
| 655 with self._gplock: | |
| 656 self.result = result | |
| 657 self.project_name = name | |
| 658 url = urljoin(self.base_url, '%s/' % quote(name)) | |
| 659 self._seen.clear() | |
| 660 self._page_cache.clear() | |
| 661 self._prepare_threads() | |
| 662 try: | |
| 663 logger.debug('Queueing %s', url) | |
| 664 self._to_fetch.put(url) | |
| 665 self._to_fetch.join() | |
| 666 finally: | |
| 667 self._wait_threads() | |
| 668 del self.result | |
| 669 return result | |
| 670 | |
| 671 platform_dependent = re.compile(r'\b(linux_(i\d86|x86_64|arm\w+)|' | |
| 672 r'win(32|_amd64)|macosx_?\d+)\b', re.I) | |
| 673 | |
| 674 def _is_platform_dependent(self, url): | |
| 675 """ | |
| 676 Does an URL refer to a platform-specific download? | |
| 677 """ | |
| 678 return self.platform_dependent.search(url) | |
| 679 | |
| 680 def _process_download(self, url): | |
| 681 """ | |
| 682 See if an URL is a suitable download for a project. | |
| 683 | |
| 684 If it is, register information in the result dictionary (for | |
| 685 _get_project) about the specific version it's for. | |
| 686 | |
| 687 Note that the return value isn't actually used other than as a boolean | |
| 688 value. | |
| 689 """ | |
| 690 if self.platform_check and self._is_platform_dependent(url): | |
| 691 info = None | |
| 692 else: | |
| 693 info = self.convert_url_to_download_info(url, self.project_name) | |
| 694 logger.debug('process_download: %s -> %s', url, info) | |
| 695 if info: | |
| 696 with self._lock: # needed because self.result is shared | |
| 697 self._update_version_data(self.result, info) | |
| 698 return info | |
| 699 | |
| 700 def _should_queue(self, link, referrer, rel): | |
| 701 """ | |
| 702 Determine whether a link URL from a referring page and with a | |
| 703 particular "rel" attribute should be queued for scraping. | |
| 704 """ | |
| 705 scheme, netloc, path, _, _, _ = urlparse(link) | |
| 706 if path.endswith(self.source_extensions + self.binary_extensions + | |
| 707 self.excluded_extensions): | |
| 708 result = False | |
| 709 elif self.skip_externals and not link.startswith(self.base_url): | |
| 710 result = False | |
| 711 elif not referrer.startswith(self.base_url): | |
| 712 result = False | |
| 713 elif rel not in ('homepage', 'download'): | |
| 714 result = False | |
| 715 elif scheme not in ('http', 'https', 'ftp'): | |
| 716 result = False | |
| 717 elif self._is_platform_dependent(link): | |
| 718 result = False | |
| 719 else: | |
| 720 host = netloc.split(':', 1)[0] | |
| 721 if host.lower() == 'localhost': | |
| 722 result = False | |
| 723 else: | |
| 724 result = True | |
| 725 logger.debug('should_queue: %s (%s) from %s -> %s', link, rel, | |
| 726 referrer, result) | |
| 727 return result | |
| 728 | |
| 729 def _fetch(self): | |
| 730 """ | |
| 731 Get a URL to fetch from the work queue, get the HTML page, examine its | |
| 732 links for download candidates and candidates for further scraping. | |
| 733 | |
| 734 This is a handy method to run in a thread. | |
| 735 """ | |
| 736 while True: | |
| 737 url = self._to_fetch.get() | |
| 738 try: | |
| 739 if url: | |
| 740 page = self.get_page(url) | |
| 741 if page is None: # e.g. after an error | |
| 742 continue | |
| 743 for link, rel in page.links: | |
| 744 if link not in self._seen: | |
| 745 try: | |
| 746 self._seen.add(link) | |
| 747 if (not self._process_download(link) and | |
| 748 self._should_queue(link, url, rel)): | |
| 749 logger.debug('Queueing %s from %s', link, url) | |
| 750 self._to_fetch.put(link) | |
| 751 except MetadataInvalidError: # e.g. invalid versions | |
| 752 pass | |
| 753 except Exception as e: # pragma: no cover | |
| 754 self.errors.put(text_type(e)) | |
| 755 finally: | |
| 756 # always do this, to avoid hangs :-) | |
| 757 self._to_fetch.task_done() | |
| 758 if not url: | |
| 759 #logger.debug('Sentinel seen, quitting.') | |
| 760 break | |
| 761 | |
| 762 def get_page(self, url): | |
| 763 """ | |
| 764 Get the HTML for an URL, possibly from an in-memory cache. | |
| 765 | |
| 766 XXX TODO Note: this cache is never actually cleared. It's assumed that | |
| 767 the data won't get stale over the lifetime of a locator instance (not | |
| 768 necessarily true for the default_locator). | |
| 769 """ | |
| 770 # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api | |
| 771 scheme, netloc, path, _, _, _ = urlparse(url) | |
| 772 if scheme == 'file' and os.path.isdir(url2pathname(path)): | |
| 773 url = urljoin(ensure_slash(url), 'index.html') | |
| 774 | |
| 775 if url in self._page_cache: | |
| 776 result = self._page_cache[url] | |
| 777 logger.debug('Returning %s from cache: %s', url, result) | |
| 778 else: | |
| 779 host = netloc.split(':', 1)[0] | |
| 780 result = None | |
| 781 if host in self._bad_hosts: | |
| 782 logger.debug('Skipping %s due to bad host %s', url, host) | |
| 783 else: | |
| 784 req = Request(url, headers={'Accept-encoding': 'identity'}) | |
| 785 try: | |
| 786 logger.debug('Fetching %s', url) | |
| 787 resp = self.opener.open(req, timeout=self.timeout) | |
| 788 logger.debug('Fetched %s', url) | |
| 789 headers = resp.info() | |
| 790 content_type = headers.get('Content-Type', '') | |
| 791 if HTML_CONTENT_TYPE.match(content_type): | |
| 792 final_url = resp.geturl() | |
| 793 data = resp.read() | |
| 794 encoding = headers.get('Content-Encoding') | |
| 795 if encoding: | |
| 796 decoder = self.decoders[encoding] # fail if not found | |
| 797 data = decoder(data) | |
| 798 encoding = 'utf-8' | |
| 799 m = CHARSET.search(content_type) | |
| 800 if m: | |
| 801 encoding = m.group(1) | |
| 802 try: | |
| 803 data = data.decode(encoding) | |
| 804 except UnicodeError: # pragma: no cover | |
| 805 data = data.decode('latin-1') # fallback | |
| 806 result = Page(data, final_url) | |
| 807 self._page_cache[final_url] = result | |
| 808 except HTTPError as e: | |
| 809 if e.code != 404: | |
| 810 logger.exception('Fetch failed: %s: %s', url, e) | |
| 811 except URLError as e: # pragma: no cover | |
| 812 logger.exception('Fetch failed: %s: %s', url, e) | |
| 813 with self._lock: | |
| 814 self._bad_hosts.add(host) | |
| 815 except Exception as e: # pragma: no cover | |
| 816 logger.exception('Fetch failed: %s: %s', url, e) | |
| 817 finally: | |
| 818 self._page_cache[url] = result # even if None (failure) | |
| 819 return result | |
| 820 | |
| 821 _distname_re = re.compile('<a href=[^>]*>([^<]+)<') | |
| 822 | |
| 823 def get_distribution_names(self): | |
| 824 """ | |
| 825 Return all the distribution names known to this locator. | |
| 826 """ | |
| 827 result = set() | |
| 828 page = self.get_page(self.base_url) | |
| 829 if not page: | |
| 830 raise DistlibException('Unable to get %s' % self.base_url) | |
| 831 for match in self._distname_re.finditer(page.data): | |
| 832 result.add(match.group(1)) | |
| 833 return result | |
| 834 | |
| 835 class DirectoryLocator(Locator): | |
| 836 """ | |
| 837 This class locates distributions in a directory tree. | |
| 838 """ | |
| 839 | |
| 840 def __init__(self, path, **kwargs): | |
| 841 """ | |
| 842 Initialise an instance. | |
| 843 :param path: The root of the directory tree to search. | |
| 844 :param kwargs: Passed to the superclass constructor, | |
| 845 except for: | |
| 846 * recursive - if True (the default), subdirectories are | |
| 847 recursed into. If False, only the top-level directory | |
| 848 is searched, | |
| 849 """ | |
| 850 self.recursive = kwargs.pop('recursive', True) | |
| 851 super(DirectoryLocator, self).__init__(**kwargs) | |
| 852 path = os.path.abspath(path) | |
| 853 if not os.path.isdir(path): # pragma: no cover | |
| 854 raise DistlibException('Not a directory: %r' % path) | |
| 855 self.base_dir = path | |
| 856 | |
| 857 def should_include(self, filename, parent): | |
| 858 """ | |
| 859 Should a filename be considered as a candidate for a distribution | |
| 860 archive? As well as the filename, the directory which contains it | |
| 861 is provided, though not used by the current implementation. | |
| 862 """ | |
| 863 return filename.endswith(self.downloadable_extensions) | |
| 864 | |
| 865 def _get_project(self, name): | |
| 866 result = {'urls': {}, 'digests': {}} | |
| 867 for root, dirs, files in os.walk(self.base_dir): | |
| 868 for fn in files: | |
| 869 if self.should_include(fn, root): | |
| 870 fn = os.path.join(root, fn) | |
| 871 url = urlunparse(('file', '', | |
| 872 pathname2url(os.path.abspath(fn)), | |
| 873 '', '', '')) | |
| 874 info = self.convert_url_to_download_info(url, name) | |
| 875 if info: | |
| 876 self._update_version_data(result, info) | |
| 877 if not self.recursive: | |
| 878 break | |
| 879 return result | |
| 880 | |
| 881 def get_distribution_names(self): | |
| 882 """ | |
| 883 Return all the distribution names known to this locator. | |
| 884 """ | |
| 885 result = set() | |
| 886 for root, dirs, files in os.walk(self.base_dir): | |
| 887 for fn in files: | |
| 888 if self.should_include(fn, root): | |
| 889 fn = os.path.join(root, fn) | |
| 890 url = urlunparse(('file', '', | |
| 891 pathname2url(os.path.abspath(fn)), | |
| 892 '', '', '')) | |
| 893 info = self.convert_url_to_download_info(url, None) | |
| 894 if info: | |
| 895 result.add(info['name']) | |
| 896 if not self.recursive: | |
| 897 break | |
| 898 return result | |
| 899 | |
| 900 class JSONLocator(Locator): | |
| 901 """ | |
| 902 This locator uses special extended metadata (not available on PyPI) and is | |
| 903 the basis of performant dependency resolution in distlib. Other locators | |
| 904 require archive downloads before dependencies can be determined! As you | |
| 905 might imagine, that can be slow. | |
| 906 """ | |
| 907 def get_distribution_names(self): | |
| 908 """ | |
| 909 Return all the distribution names known to this locator. | |
| 910 """ | |
| 911 raise NotImplementedError('Not available from this locator') | |
| 912 | |
| 913 def _get_project(self, name): | |
| 914 result = {'urls': {}, 'digests': {}} | |
| 915 data = get_project_data(name) | |
| 916 if data: | |
| 917 for info in data.get('files', []): | |
| 918 if info['ptype'] != 'sdist' or info['pyversion'] != 'source': | |
| 919 continue | |
| 920 # We don't store summary in project metadata as it makes | |
| 921 # the data bigger for no benefit during dependency | |
| 922 # resolution | |
| 923 dist = make_dist(data['name'], info['version'], | |
| 924 summary=data.get('summary', | |
| 925 'Placeholder for summary'), | |
| 926 scheme=self.scheme) | |
| 927 md = dist.metadata | |
| 928 md.source_url = info['url'] | |
| 929 # TODO SHA256 digest | |
| 930 if 'digest' in info and info['digest']: | |
| 931 dist.digest = ('md5', info['digest']) | |
| 932 md.dependencies = info.get('requirements', {}) | |
| 933 dist.exports = info.get('exports', {}) | |
| 934 result[dist.version] = dist | |
| 935 result['urls'].setdefault(dist.version, set()).add(info['url']) | |
| 936 return result | |
| 937 | |
| 938 class DistPathLocator(Locator): | |
| 939 """ | |
| 940 This locator finds installed distributions in a path. It can be useful for | |
| 941 adding to an :class:`AggregatingLocator`. | |
| 942 """ | |
| 943 def __init__(self, distpath, **kwargs): | |
| 944 """ | |
| 945 Initialise an instance. | |
| 946 | |
| 947 :param distpath: A :class:`DistributionPath` instance to search. | |
| 948 """ | |
| 949 super(DistPathLocator, self).__init__(**kwargs) | |
| 950 assert isinstance(distpath, DistributionPath) | |
| 951 self.distpath = distpath | |
| 952 | |
| 953 def _get_project(self, name): | |
| 954 dist = self.distpath.get_distribution(name) | |
| 955 if dist is None: | |
| 956 result = {'urls': {}, 'digests': {}} | |
| 957 else: | |
| 958 result = { | |
| 959 dist.version: dist, | |
| 960 'urls': {dist.version: set([dist.source_url])}, | |
| 961 'digests': {dist.version: set([None])} | |
| 962 } | |
| 963 return result | |
| 964 | |
| 965 | |
| 966 class AggregatingLocator(Locator): | |
| 967 """ | |
| 968 This class allows you to chain and/or merge a list of locators. | |
| 969 """ | |
| 970 def __init__(self, *locators, **kwargs): | |
| 971 """ | |
| 972 Initialise an instance. | |
| 973 | |
| 974 :param locators: The list of locators to search. | |
| 975 :param kwargs: Passed to the superclass constructor, | |
| 976 except for: | |
| 977 * merge - if False (the default), the first successful | |
| 978 search from any of the locators is returned. If True, | |
| 979 the results from all locators are merged (this can be | |
| 980 slow). | |
| 981 """ | |
| 982 self.merge = kwargs.pop('merge', False) | |
| 983 self.locators = locators | |
| 984 super(AggregatingLocator, self).__init__(**kwargs) | |
| 985 | |
| 986 def clear_cache(self): | |
| 987 super(AggregatingLocator, self).clear_cache() | |
| 988 for locator in self.locators: | |
| 989 locator.clear_cache() | |
| 990 | |
| 991 def _set_scheme(self, value): | |
| 992 self._scheme = value | |
| 993 for locator in self.locators: | |
| 994 locator.scheme = value | |
| 995 | |
| 996 scheme = property(Locator.scheme.fget, _set_scheme) | |
| 997 | |
| 998 def _get_project(self, name): | |
| 999 result = {} | |
| 1000 for locator in self.locators: | |
| 1001 d = locator.get_project(name) | |
| 1002 if d: | |
| 1003 if self.merge: | |
| 1004 files = result.get('urls', {}) | |
| 1005 digests = result.get('digests', {}) | |
| 1006 # next line could overwrite result['urls'], result['digests'] | |
| 1007 result.update(d) | |
| 1008 df = result.get('urls') | |
| 1009 if files and df: | |
| 1010 for k, v in files.items(): | |
| 1011 if k in df: | |
| 1012 df[k] |= v | |
| 1013 else: | |
| 1014 df[k] = v | |
| 1015 dd = result.get('digests') | |
| 1016 if digests and dd: | |
| 1017 dd.update(digests) | |
| 1018 else: | |
| 1019 # See issue #18. If any dists are found and we're looking | |
| 1020 # for specific constraints, we only return something if | |
| 1021 # a match is found. For example, if a DirectoryLocator | |
| 1022 # returns just foo (1.0) while we're looking for | |
| 1023 # foo (>= 2.0), we'll pretend there was nothing there so | |
| 1024 # that subsequent locators can be queried. Otherwise we | |
| 1025 # would just return foo (1.0) which would then lead to a | |
| 1026 # failure to find foo (>= 2.0), because other locators | |
| 1027 # weren't searched. Note that this only matters when | |
| 1028 # merge=False. | |
| 1029 if self.matcher is None: | |
| 1030 found = True | |
| 1031 else: | |
| 1032 found = False | |
| 1033 for k in d: | |
| 1034 if self.matcher.match(k): | |
| 1035 found = True | |
| 1036 break | |
| 1037 if found: | |
| 1038 result = d | |
| 1039 break | |
| 1040 return result | |
| 1041 | |
| 1042 def get_distribution_names(self): | |
| 1043 """ | |
| 1044 Return all the distribution names known to this locator. | |
| 1045 """ | |
| 1046 result = set() | |
| 1047 for locator in self.locators: | |
| 1048 try: | |
| 1049 result |= locator.get_distribution_names() | |
| 1050 except NotImplementedError: | |
| 1051 pass | |
| 1052 return result | |
| 1053 | |
| 1054 | |
| 1055 # We use a legacy scheme simply because most of the dists on PyPI use legacy | |
| 1056 # versions which don't conform to PEP 426 / PEP 440. | |
| 1057 default_locator = AggregatingLocator( | |
| 1058 JSONLocator(), | |
| 1059 SimpleScrapingLocator('https://pypi.org/simple/', | |
| 1060 timeout=3.0), | |
| 1061 scheme='legacy') | |
| 1062 | |
| 1063 locate = default_locator.locate | |
| 1064 | |
| 1065 NAME_VERSION_RE = re.compile(r'(?P<name>[\w-]+)\s*' | |
| 1066 r'\(\s*(==\s*)?(?P<ver>[^)]+)\)$') | |
| 1067 | |
| 1068 class DependencyFinder(object): | |
| 1069 """ | |
| 1070 Locate dependencies for distributions. | |
| 1071 """ | |
| 1072 | |
| 1073 def __init__(self, locator=None): | |
| 1074 """ | |
| 1075 Initialise an instance, using the specified locator | |
| 1076 to locate distributions. | |
| 1077 """ | |
| 1078 self.locator = locator or default_locator | |
| 1079 self.scheme = get_scheme(self.locator.scheme) | |
| 1080 | |
| 1081 def add_distribution(self, dist): | |
| 1082 """ | |
| 1083 Add a distribution to the finder. This will update internal information | |
| 1084 about who provides what. | |
| 1085 :param dist: The distribution to add. | |
| 1086 """ | |
| 1087 logger.debug('adding distribution %s', dist) | |
| 1088 name = dist.key | |
| 1089 self.dists_by_name[name] = dist | |
| 1090 self.dists[(name, dist.version)] = dist | |
| 1091 for p in dist.provides: | |
| 1092 name, version = parse_name_and_version(p) | |
| 1093 logger.debug('Add to provided: %s, %s, %s', name, version, dist) | |
| 1094 self.provided.setdefault(name, set()).add((version, dist)) | |
| 1095 | |
| 1096 def remove_distribution(self, dist): | |
| 1097 """ | |
| 1098 Remove a distribution from the finder. This will update internal | |
| 1099 information about who provides what. | |
| 1100 :param dist: The distribution to remove. | |
| 1101 """ | |
| 1102 logger.debug('removing distribution %s', dist) | |
| 1103 name = dist.key | |
| 1104 del self.dists_by_name[name] | |
| 1105 del self.dists[(name, dist.version)] | |
| 1106 for p in dist.provides: | |
| 1107 name, version = parse_name_and_version(p) | |
| 1108 logger.debug('Remove from provided: %s, %s, %s', name, version, dist) | |
| 1109 s = self.provided[name] | |
| 1110 s.remove((version, dist)) | |
| 1111 if not s: | |
| 1112 del self.provided[name] | |
| 1113 | |
| 1114 def get_matcher(self, reqt): | |
| 1115 """ | |
| 1116 Get a version matcher for a requirement. | |
| 1117 :param reqt: The requirement | |
| 1118 :type reqt: str | |
| 1119 :return: A version matcher (an instance of | |
| 1120 :class:`distlib.version.Matcher`). | |
| 1121 """ | |
| 1122 try: | |
| 1123 matcher = self.scheme.matcher(reqt) | |
| 1124 except UnsupportedVersionError: # pragma: no cover | |
| 1125 # XXX compat-mode if cannot read the version | |
| 1126 name = reqt.split()[0] | |
| 1127 matcher = self.scheme.matcher(name) | |
| 1128 return matcher | |
| 1129 | |
| 1130 def find_providers(self, reqt): | |
| 1131 """ | |
| 1132 Find the distributions which can fulfill a requirement. | |
| 1133 | |
| 1134 :param reqt: The requirement. | |
| 1135 :type reqt: str | |
| 1136 :return: A set of distribution which can fulfill the requirement. | |
| 1137 """ | |
| 1138 matcher = self.get_matcher(reqt) | |
| 1139 name = matcher.key # case-insensitive | |
| 1140 result = set() | |
| 1141 provided = self.provided | |
| 1142 if name in provided: | |
| 1143 for version, provider in provided[name]: | |
| 1144 try: | |
| 1145 match = matcher.match(version) | |
| 1146 except UnsupportedVersionError: | |
| 1147 match = False | |
| 1148 | |
| 1149 if match: | |
| 1150 result.add(provider) | |
| 1151 break | |
| 1152 return result | |
| 1153 | |
| 1154 def try_to_replace(self, provider, other, problems): | |
| 1155 """ | |
| 1156 Attempt to replace one provider with another. This is typically used | |
| 1157 when resolving dependencies from multiple sources, e.g. A requires | |
| 1158 (B >= 1.0) while C requires (B >= 1.1). | |
| 1159 | |
| 1160 For successful replacement, ``provider`` must meet all the requirements | |
| 1161 which ``other`` fulfills. | |
| 1162 | |
| 1163 :param provider: The provider we are trying to replace with. | |
| 1164 :param other: The provider we're trying to replace. | |
| 1165 :param problems: If False is returned, this will contain what | |
| 1166 problems prevented replacement. This is currently | |
| 1167 a tuple of the literal string 'cantreplace', | |
| 1168 ``provider``, ``other`` and the set of requirements | |
| 1169 that ``provider`` couldn't fulfill. | |
| 1170 :return: True if we can replace ``other`` with ``provider``, else | |
| 1171 False. | |
| 1172 """ | |
| 1173 rlist = self.reqts[other] | |
| 1174 unmatched = set() | |
| 1175 for s in rlist: | |
| 1176 matcher = self.get_matcher(s) | |
| 1177 if not matcher.match(provider.version): | |
| 1178 unmatched.add(s) | |
| 1179 if unmatched: | |
| 1180 # can't replace other with provider | |
| 1181 problems.add(('cantreplace', provider, other, | |
| 1182 frozenset(unmatched))) | |
| 1183 result = False | |
| 1184 else: | |
| 1185 # can replace other with provider | |
| 1186 self.remove_distribution(other) | |
| 1187 del self.reqts[other] | |
| 1188 for s in rlist: | |
| 1189 self.reqts.setdefault(provider, set()).add(s) | |
| 1190 self.add_distribution(provider) | |
| 1191 result = True | |
| 1192 return result | |
| 1193 | |
| 1194 def find(self, requirement, meta_extras=None, prereleases=False): | |
| 1195 """ | |
| 1196 Find a distribution and all distributions it depends on. | |
| 1197 | |
| 1198 :param requirement: The requirement specifying the distribution to | |
| 1199 find, or a Distribution instance. | |
| 1200 :param meta_extras: A list of meta extras such as :test:, :build: and | |
| 1201 so on. | |
| 1202 :param prereleases: If ``True``, allow pre-release versions to be | |
| 1203 returned - otherwise, don't return prereleases | |
| 1204 unless they're all that's available. | |
| 1205 | |
| 1206 Return a set of :class:`Distribution` instances and a set of | |
| 1207 problems. | |
| 1208 | |
| 1209 The distributions returned should be such that they have the | |
| 1210 :attr:`required` attribute set to ``True`` if they were | |
| 1211 from the ``requirement`` passed to ``find()``, and they have the | |
| 1212 :attr:`build_time_dependency` attribute set to ``True`` unless they | |
| 1213 are post-installation dependencies of the ``requirement``. | |
| 1214 | |
| 1215 The problems should be a tuple consisting of the string | |
| 1216 ``'unsatisfied'`` and the requirement which couldn't be satisfied | |
| 1217 by any distribution known to the locator. | |
| 1218 """ | |
| 1219 | |
| 1220 self.provided = {} | |
| 1221 self.dists = {} | |
| 1222 self.dists_by_name = {} | |
| 1223 self.reqts = {} | |
| 1224 | |
| 1225 meta_extras = set(meta_extras or []) | |
| 1226 if ':*:' in meta_extras: | |
| 1227 meta_extras.remove(':*:') | |
| 1228 # :meta: and :run: are implicitly included | |
| 1229 meta_extras |= set([':test:', ':build:', ':dev:']) | |
| 1230 | |
| 1231 if isinstance(requirement, Distribution): | |
| 1232 dist = odist = requirement | |
| 1233 logger.debug('passed %s as requirement', odist) | |
| 1234 else: | |
| 1235 dist = odist = self.locator.locate(requirement, | |
| 1236 prereleases=prereleases) | |
| 1237 if dist is None: | |
| 1238 raise DistlibException('Unable to locate %r' % requirement) | |
| 1239 logger.debug('located %s', odist) | |
| 1240 dist.requested = True | |
| 1241 problems = set() | |
| 1242 todo = set([dist]) | |
| 1243 install_dists = set([odist]) | |
| 1244 while todo: | |
| 1245 dist = todo.pop() | |
| 1246 name = dist.key # case-insensitive | |
| 1247 if name not in self.dists_by_name: | |
| 1248 self.add_distribution(dist) | |
| 1249 else: | |
| 1250 #import pdb; pdb.set_trace() | |
| 1251 other = self.dists_by_name[name] | |
| 1252 if other != dist: | |
| 1253 self.try_to_replace(dist, other, problems) | |
| 1254 | |
| 1255 ireqts = dist.run_requires | dist.meta_requires | |
| 1256 sreqts = dist.build_requires | |
| 1257 ereqts = set() | |
| 1258 if meta_extras and dist in install_dists: | |
| 1259 for key in ('test', 'build', 'dev'): | |
| 1260 e = ':%s:' % key | |
| 1261 if e in meta_extras: | |
| 1262 ereqts |= getattr(dist, '%s_requires' % key) | |
| 1263 all_reqts = ireqts | sreqts | ereqts | |
| 1264 for r in all_reqts: | |
| 1265 providers = self.find_providers(r) | |
| 1266 if not providers: | |
| 1267 logger.debug('No providers found for %r', r) | |
| 1268 provider = self.locator.locate(r, prereleases=prereleases) | |
| 1269 # If no provider is found and we didn't consider | |
| 1270 # prereleases, consider them now. | |
| 1271 if provider is None and not prereleases: | |
| 1272 provider = self.locator.locate(r, prereleases=True) | |
| 1273 if provider is None: | |
| 1274 logger.debug('Cannot satisfy %r', r) | |
| 1275 problems.add(('unsatisfied', r)) | |
| 1276 else: | |
| 1277 n, v = provider.key, provider.version | |
| 1278 if (n, v) not in self.dists: | |
| 1279 todo.add(provider) | |
| 1280 providers.add(provider) | |
| 1281 if r in ireqts and dist in install_dists: | |
| 1282 install_dists.add(provider) | |
| 1283 logger.debug('Adding %s to install_dists', | |
| 1284 provider.name_and_version) | |
| 1285 for p in providers: | |
| 1286 name = p.key | |
| 1287 if name not in self.dists_by_name: | |
| 1288 self.reqts.setdefault(p, set()).add(r) | |
| 1289 else: | |
| 1290 other = self.dists_by_name[name] | |
| 1291 if other != p: | |
| 1292 # see if other can be replaced by p | |
| 1293 self.try_to_replace(p, other, problems) | |
| 1294 | |
| 1295 dists = set(self.dists.values()) | |
| 1296 for dist in dists: | |
| 1297 dist.build_time_dependency = dist not in install_dists | |
| 1298 if dist.build_time_dependency: | |
| 1299 logger.debug('%s is a build-time dependency only.', | |
| 1300 dist.name_and_version) | |
| 1301 logger.debug('find done for %s', odist) | |
| 1302 return dists, problems |
