comparison env/lib/python3.7/site-packages/pip/_internal/cache.py @ 2:6af9afd405e9 draft

"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author shellac
date Thu, 14 May 2020 14:56:58 -0400
parents 26e78fe6e8c4
children
comparison
equal deleted inserted replaced
1:75ca89e9b81c 2:6af9afd405e9
1 """Cache Management
2 """
3
4 # The following comment should be removed at some point in the future.
5 # mypy: strict-optional=False
6
7 import hashlib
8 import json
9 import logging
10 import os
11
12 from pip._vendor.packaging.tags import interpreter_name, interpreter_version
13 from pip._vendor.packaging.utils import canonicalize_name
14
15 from pip._internal.exceptions import InvalidWheelFilename
16 from pip._internal.models.link import Link
17 from pip._internal.models.wheel import Wheel
18 from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
20 from pip._internal.utils.urls import path_to_url
21
22 if MYPY_CHECK_RUNNING:
23 from typing import Optional, Set, List, Any, Dict
24
25 from pip._vendor.packaging.tags import Tag
26
27 from pip._internal.models.format_control import FormatControl
28
29 logger = logging.getLogger(__name__)
30
31
32 def _hash_dict(d):
33 # type: (Dict[str, str]) -> str
34 """Return a stable sha224 of a dictionary."""
35 s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
36 return hashlib.sha224(s.encode("ascii")).hexdigest()
37
38
39 class Cache(object):
40 """An abstract class - provides cache directories for data from links
41
42
43 :param cache_dir: The root of the cache.
44 :param format_control: An object of FormatControl class to limit
45 binaries being read from the cache.
46 :param allowed_formats: which formats of files the cache should store.
47 ('binary' and 'source' are the only allowed values)
48 """
49
50 def __init__(self, cache_dir, format_control, allowed_formats):
51 # type: (str, FormatControl, Set[str]) -> None
52 super(Cache, self).__init__()
53 assert not cache_dir or os.path.isabs(cache_dir)
54 self.cache_dir = cache_dir or None
55 self.format_control = format_control
56 self.allowed_formats = allowed_formats
57
58 _valid_formats = {"source", "binary"}
59 assert self.allowed_formats.union(_valid_formats) == _valid_formats
60
61 def _get_cache_path_parts_legacy(self, link):
62 # type: (Link) -> List[str]
63 """Get parts of part that must be os.path.joined with cache_dir
64
65 Legacy cache key (pip < 20) for compatibility with older caches.
66 """
67
68 # We want to generate an url to use as our cache key, we don't want to
69 # just re-use the URL because it might have other items in the fragment
70 # and we don't care about those.
71 key_parts = [link.url_without_fragment]
72 if link.hash_name is not None and link.hash is not None:
73 key_parts.append("=".join([link.hash_name, link.hash]))
74 key_url = "#".join(key_parts)
75
76 # Encode our key url with sha224, we'll use this because it has similar
77 # security properties to sha256, but with a shorter total output (and
78 # thus less secure). However the differences don't make a lot of
79 # difference for our use case here.
80 hashed = hashlib.sha224(key_url.encode()).hexdigest()
81
82 # We want to nest the directories some to prevent having a ton of top
83 # level directories where we might run out of sub directories on some
84 # FS.
85 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
86
87 return parts
88
89 def _get_cache_path_parts(self, link):
90 # type: (Link) -> List[str]
91 """Get parts of part that must be os.path.joined with cache_dir
92 """
93
94 # We want to generate an url to use as our cache key, we don't want to
95 # just re-use the URL because it might have other items in the fragment
96 # and we don't care about those.
97 key_parts = {"url": link.url_without_fragment}
98 if link.hash_name is not None and link.hash is not None:
99 key_parts[link.hash_name] = link.hash
100 if link.subdirectory_fragment:
101 key_parts["subdirectory"] = link.subdirectory_fragment
102
103 # Include interpreter name, major and minor version in cache key
104 # to cope with ill-behaved sdists that build a different wheel
105 # depending on the python version their setup.py is being run on,
106 # and don't encode the difference in compatibility tags.
107 # https://github.com/pypa/pip/issues/7296
108 key_parts["interpreter_name"] = interpreter_name()
109 key_parts["interpreter_version"] = interpreter_version()
110
111 # Encode our key url with sha224, we'll use this because it has similar
112 # security properties to sha256, but with a shorter total output (and
113 # thus less secure). However the differences don't make a lot of
114 # difference for our use case here.
115 hashed = _hash_dict(key_parts)
116
117 # We want to nest the directories some to prevent having a ton of top
118 # level directories where we might run out of sub directories on some
119 # FS.
120 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
121
122 return parts
123
124 def _get_candidates(self, link, canonical_package_name):
125 # type: (Link, Optional[str]) -> List[Any]
126 can_not_cache = (
127 not self.cache_dir or
128 not canonical_package_name or
129 not link
130 )
131 if can_not_cache:
132 return []
133
134 formats = self.format_control.get_allowed_formats(
135 canonical_package_name
136 )
137 if not self.allowed_formats.intersection(formats):
138 return []
139
140 candidates = []
141 path = self.get_path_for_link(link)
142 if os.path.isdir(path):
143 for candidate in os.listdir(path):
144 candidates.append((candidate, path))
145 # TODO remove legacy path lookup in pip>=21
146 legacy_path = self.get_path_for_link_legacy(link)
147 if os.path.isdir(legacy_path):
148 for candidate in os.listdir(legacy_path):
149 candidates.append((candidate, legacy_path))
150 return candidates
151
152 def get_path_for_link_legacy(self, link):
153 # type: (Link) -> str
154 raise NotImplementedError()
155
156 def get_path_for_link(self, link):
157 # type: (Link) -> str
158 """Return a directory to store cached items in for link.
159 """
160 raise NotImplementedError()
161
162 def get(
163 self,
164 link, # type: Link
165 package_name, # type: Optional[str]
166 supported_tags, # type: List[Tag]
167 ):
168 # type: (...) -> Link
169 """Returns a link to a cached item if it exists, otherwise returns the
170 passed link.
171 """
172 raise NotImplementedError()
173
174
175 class SimpleWheelCache(Cache):
176 """A cache of wheels for future installs.
177 """
178
179 def __init__(self, cache_dir, format_control):
180 # type: (str, FormatControl) -> None
181 super(SimpleWheelCache, self).__init__(
182 cache_dir, format_control, {"binary"}
183 )
184
185 def get_path_for_link_legacy(self, link):
186 # type: (Link) -> str
187 parts = self._get_cache_path_parts_legacy(link)
188 return os.path.join(self.cache_dir, "wheels", *parts)
189
190 def get_path_for_link(self, link):
191 # type: (Link) -> str
192 """Return a directory to store cached wheels for link
193
194 Because there are M wheels for any one sdist, we provide a directory
195 to cache them in, and then consult that directory when looking up
196 cache hits.
197
198 We only insert things into the cache if they have plausible version
199 numbers, so that we don't contaminate the cache with things that were
200 not unique. E.g. ./package might have dozens of installs done for it
201 and build a version of 0.0...and if we built and cached a wheel, we'd
202 end up using the same wheel even if the source has been edited.
203
204 :param link: The link of the sdist for which this will cache wheels.
205 """
206 parts = self._get_cache_path_parts(link)
207
208 # Store wheels within the root cache_dir
209 return os.path.join(self.cache_dir, "wheels", *parts)
210
211 def get(
212 self,
213 link, # type: Link
214 package_name, # type: Optional[str]
215 supported_tags, # type: List[Tag]
216 ):
217 # type: (...) -> Link
218 candidates = []
219
220 if not package_name:
221 return link
222
223 canonical_package_name = canonicalize_name(package_name)
224 for wheel_name, wheel_dir in self._get_candidates(
225 link, canonical_package_name
226 ):
227 try:
228 wheel = Wheel(wheel_name)
229 except InvalidWheelFilename:
230 continue
231 if canonicalize_name(wheel.name) != canonical_package_name:
232 logger.debug(
233 "Ignoring cached wheel {} for {} as it "
234 "does not match the expected distribution name {}.".format(
235 wheel_name, link, package_name
236 )
237 )
238 continue
239 if not wheel.supported(supported_tags):
240 # Built for a different python/arch/etc
241 continue
242 candidates.append(
243 (
244 wheel.support_index_min(supported_tags),
245 wheel_name,
246 wheel_dir,
247 )
248 )
249
250 if not candidates:
251 return link
252
253 _, wheel_name, wheel_dir = min(candidates)
254 return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
255
256
257 class EphemWheelCache(SimpleWheelCache):
258 """A SimpleWheelCache that creates it's own temporary cache directory
259 """
260
261 def __init__(self, format_control):
262 # type: (FormatControl) -> None
263 self._temp_dir = TempDirectory(
264 kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
265 globally_managed=True,
266 )
267
268 super(EphemWheelCache, self).__init__(
269 self._temp_dir.path, format_control
270 )
271
272
273 class CacheEntry(object):
274 def __init__(
275 self,
276 link, # type: Link
277 persistent, # type: bool
278 ):
279 self.link = link
280 self.persistent = persistent
281
282
283 class WheelCache(Cache):
284 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
285
286 This Cache allows for gracefully degradation, using the ephem wheel cache
287 when a certain link is not found in the simple wheel cache first.
288 """
289
290 def __init__(self, cache_dir, format_control):
291 # type: (str, FormatControl) -> None
292 super(WheelCache, self).__init__(
293 cache_dir, format_control, {'binary'}
294 )
295 self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
296 self._ephem_cache = EphemWheelCache(format_control)
297
298 def get_path_for_link_legacy(self, link):
299 # type: (Link) -> str
300 return self._wheel_cache.get_path_for_link_legacy(link)
301
302 def get_path_for_link(self, link):
303 # type: (Link) -> str
304 return self._wheel_cache.get_path_for_link(link)
305
306 def get_ephem_path_for_link(self, link):
307 # type: (Link) -> str
308 return self._ephem_cache.get_path_for_link(link)
309
310 def get(
311 self,
312 link, # type: Link
313 package_name, # type: Optional[str]
314 supported_tags, # type: List[Tag]
315 ):
316 # type: (...) -> Link
317 cache_entry = self.get_cache_entry(link, package_name, supported_tags)
318 if cache_entry is None:
319 return link
320 return cache_entry.link
321
322 def get_cache_entry(
323 self,
324 link, # type: Link
325 package_name, # type: Optional[str]
326 supported_tags, # type: List[Tag]
327 ):
328 # type: (...) -> Optional[CacheEntry]
329 """Returns a CacheEntry with a link to a cached item if it exists or
330 None. The cache entry indicates if the item was found in the persistent
331 or ephemeral cache.
332 """
333 retval = self._wheel_cache.get(
334 link=link,
335 package_name=package_name,
336 supported_tags=supported_tags,
337 )
338 if retval is not link:
339 return CacheEntry(retval, persistent=True)
340
341 retval = self._ephem_cache.get(
342 link=link,
343 package_name=package_name,
344 supported_tags=supported_tags,
345 )
346 if retval is not link:
347 return CacheEntry(retval, persistent=False)
348
349 return None