comparison lib/python3.8/site-packages/pip/_internal/cache.py @ 0:9e54283cc701 draft

"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author guerler
date Mon, 27 Jul 2020 03:47:31 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e54283cc701
1 """Cache Management
2 """
3
4 # The following comment should be removed at some point in the future.
5 # mypy: strict-optional=False
6
7 import hashlib
8 import json
9 import logging
10 import os
11
12 from pip._vendor.packaging.tags import interpreter_name, interpreter_version
13 from pip._vendor.packaging.utils import canonicalize_name
14
15 from pip._internal.exceptions import InvalidWheelFilename
16 from pip._internal.models.link import Link
17 from pip._internal.models.wheel import Wheel
18 from pip._internal.utils.temp_dir import TempDirectory
19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
20 from pip._internal.utils.urls import path_to_url
21
22 if MYPY_CHECK_RUNNING:
23 from typing import Optional, Set, List, Any, Dict
24
25 from pip._vendor.packaging.tags import Tag
26
27 from pip._internal.models.format_control import FormatControl
28
29 logger = logging.getLogger(__name__)
30
31
32 def _hash_dict(d):
33 # type: (Dict[str, str]) -> str
34 """Return a stable sha224 of a dictionary."""
35 s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
36 return hashlib.sha224(s.encode("ascii")).hexdigest()
37
38
39 class Cache(object):
40 """An abstract class - provides cache directories for data from links
41
42
43 :param cache_dir: The root of the cache.
44 :param format_control: An object of FormatControl class to limit
45 binaries being read from the cache.
46 :param allowed_formats: which formats of files the cache should store.
47 ('binary' and 'source' are the only allowed values)
48 """
49
50 def __init__(self, cache_dir, format_control, allowed_formats):
51 # type: (str, FormatControl, Set[str]) -> None
52 super(Cache, self).__init__()
53 assert not cache_dir or os.path.isabs(cache_dir)
54 self.cache_dir = cache_dir or None
55 self.format_control = format_control
56 self.allowed_formats = allowed_formats
57
58 _valid_formats = {"source", "binary"}
59 assert self.allowed_formats.union(_valid_formats) == _valid_formats
60
61 def _get_cache_path_parts_legacy(self, link):
62 # type: (Link) -> List[str]
63 """Get parts of part that must be os.path.joined with cache_dir
64
65 Legacy cache key (pip < 20) for compatibility with older caches.
66 """
67
68 # We want to generate an url to use as our cache key, we don't want to
69 # just re-use the URL because it might have other items in the fragment
70 # and we don't care about those.
71 key_parts = [link.url_without_fragment]
72 if link.hash_name is not None and link.hash is not None:
73 key_parts.append("=".join([link.hash_name, link.hash]))
74 key_url = "#".join(key_parts)
75
76 # Encode our key url with sha224, we'll use this because it has similar
77 # security properties to sha256, but with a shorter total output (and
78 # thus less secure). However the differences don't make a lot of
79 # difference for our use case here.
80 hashed = hashlib.sha224(key_url.encode()).hexdigest()
81
82 # We want to nest the directories some to prevent having a ton of top
83 # level directories where we might run out of sub directories on some
84 # FS.
85 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
86
87 return parts
88
89 def _get_cache_path_parts(self, link):
90 # type: (Link) -> List[str]
91 """Get parts of part that must be os.path.joined with cache_dir
92 """
93
94 # We want to generate an url to use as our cache key, we don't want to
95 # just re-use the URL because it might have other items in the fragment
96 # and we don't care about those.
97 key_parts = {"url": link.url_without_fragment}
98 if link.hash_name is not None and link.hash is not None:
99 key_parts[link.hash_name] = link.hash
100 if link.subdirectory_fragment:
101 key_parts["subdirectory"] = link.subdirectory_fragment
102
103 # Include interpreter name, major and minor version in cache key
104 # to cope with ill-behaved sdists that build a different wheel
105 # depending on the python version their setup.py is being run on,
106 # and don't encode the difference in compatibility tags.
107 # https://github.com/pypa/pip/issues/7296
108 key_parts["interpreter_name"] = interpreter_name()
109 key_parts["interpreter_version"] = interpreter_version()
110
111 # Encode our key url with sha224, we'll use this because it has similar
112 # security properties to sha256, but with a shorter total output (and
113 # thus less secure). However the differences don't make a lot of
114 # difference for our use case here.
115 hashed = _hash_dict(key_parts)
116
117 # We want to nest the directories some to prevent having a ton of top
118 # level directories where we might run out of sub directories on some
119 # FS.
120 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
121
122 return parts
123
124 def _get_candidates(self, link, canonical_package_name):
125 # type: (Link, Optional[str]) -> List[Any]
126 can_not_cache = (
127 not self.cache_dir or
128 not canonical_package_name or
129 not link
130 )
131 if can_not_cache:
132 return []
133
134 formats = self.format_control.get_allowed_formats(
135 canonical_package_name
136 )
137 if not self.allowed_formats.intersection(formats):
138 return []
139
140 candidates = []
141 path = self.get_path_for_link(link)
142 if os.path.isdir(path):
143 for candidate in os.listdir(path):
144 candidates.append((candidate, path))
145 # TODO remove legacy path lookup in pip>=21
146 legacy_path = self.get_path_for_link_legacy(link)
147 if os.path.isdir(legacy_path):
148 for candidate in os.listdir(legacy_path):
149 candidates.append((candidate, legacy_path))
150 return candidates
151
152 def get_path_for_link_legacy(self, link):
153 # type: (Link) -> str
154 raise NotImplementedError()
155
156 def get_path_for_link(self, link):
157 # type: (Link) -> str
158 """Return a directory to store cached items in for link.
159 """
160 raise NotImplementedError()
161
162 def get(
163 self,
164 link, # type: Link
165 package_name, # type: Optional[str]
166 supported_tags, # type: List[Tag]
167 ):
168 # type: (...) -> Link
169 """Returns a link to a cached item if it exists, otherwise returns the
170 passed link.
171 """
172 raise NotImplementedError()
173
174 def cleanup(self):
175 # type: () -> None
176 pass
177
178
179 class SimpleWheelCache(Cache):
180 """A cache of wheels for future installs.
181 """
182
183 def __init__(self, cache_dir, format_control):
184 # type: (str, FormatControl) -> None
185 super(SimpleWheelCache, self).__init__(
186 cache_dir, format_control, {"binary"}
187 )
188
189 def get_path_for_link_legacy(self, link):
190 # type: (Link) -> str
191 parts = self._get_cache_path_parts_legacy(link)
192 return os.path.join(self.cache_dir, "wheels", *parts)
193
194 def get_path_for_link(self, link):
195 # type: (Link) -> str
196 """Return a directory to store cached wheels for link
197
198 Because there are M wheels for any one sdist, we provide a directory
199 to cache them in, and then consult that directory when looking up
200 cache hits.
201
202 We only insert things into the cache if they have plausible version
203 numbers, so that we don't contaminate the cache with things that were
204 not unique. E.g. ./package might have dozens of installs done for it
205 and build a version of 0.0...and if we built and cached a wheel, we'd
206 end up using the same wheel even if the source has been edited.
207
208 :param link: The link of the sdist for which this will cache wheels.
209 """
210 parts = self._get_cache_path_parts(link)
211
212 # Store wheels within the root cache_dir
213 return os.path.join(self.cache_dir, "wheels", *parts)
214
215 def get(
216 self,
217 link, # type: Link
218 package_name, # type: Optional[str]
219 supported_tags, # type: List[Tag]
220 ):
221 # type: (...) -> Link
222 candidates = []
223
224 if not package_name:
225 return link
226
227 canonical_package_name = canonicalize_name(package_name)
228 for wheel_name, wheel_dir in self._get_candidates(
229 link, canonical_package_name
230 ):
231 try:
232 wheel = Wheel(wheel_name)
233 except InvalidWheelFilename:
234 continue
235 if canonicalize_name(wheel.name) != canonical_package_name:
236 logger.debug(
237 "Ignoring cached wheel {} for {} as it "
238 "does not match the expected distribution name {}.".format(
239 wheel_name, link, package_name
240 )
241 )
242 continue
243 if not wheel.supported(supported_tags):
244 # Built for a different python/arch/etc
245 continue
246 candidates.append(
247 (
248 wheel.support_index_min(supported_tags),
249 wheel_name,
250 wheel_dir,
251 )
252 )
253
254 if not candidates:
255 return link
256
257 _, wheel_name, wheel_dir = min(candidates)
258 return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
259
260
261 class EphemWheelCache(SimpleWheelCache):
262 """A SimpleWheelCache that creates it's own temporary cache directory
263 """
264
265 def __init__(self, format_control):
266 # type: (FormatControl) -> None
267 self._temp_dir = TempDirectory(kind="ephem-wheel-cache")
268
269 super(EphemWheelCache, self).__init__(
270 self._temp_dir.path, format_control
271 )
272
273 def cleanup(self):
274 # type: () -> None
275 self._temp_dir.cleanup()
276
277
278 class WheelCache(Cache):
279 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
280
281 This Cache allows for gracefully degradation, using the ephem wheel cache
282 when a certain link is not found in the simple wheel cache first.
283 """
284
285 def __init__(self, cache_dir, format_control):
286 # type: (str, FormatControl) -> None
287 super(WheelCache, self).__init__(
288 cache_dir, format_control, {'binary'}
289 )
290 self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
291 self._ephem_cache = EphemWheelCache(format_control)
292
293 def get_path_for_link_legacy(self, link):
294 # type: (Link) -> str
295 return self._wheel_cache.get_path_for_link_legacy(link)
296
297 def get_path_for_link(self, link):
298 # type: (Link) -> str
299 return self._wheel_cache.get_path_for_link(link)
300
301 def get_ephem_path_for_link(self, link):
302 # type: (Link) -> str
303 return self._ephem_cache.get_path_for_link(link)
304
305 def get(
306 self,
307 link, # type: Link
308 package_name, # type: Optional[str]
309 supported_tags, # type: List[Tag]
310 ):
311 # type: (...) -> Link
312 retval = self._wheel_cache.get(
313 link=link,
314 package_name=package_name,
315 supported_tags=supported_tags,
316 )
317 if retval is not link:
318 return retval
319
320 return self._ephem_cache.get(
321 link=link,
322 package_name=package_name,
323 supported_tags=supported_tags,
324 )
325
326 def cleanup(self):
327 # type: () -> None
328 self._wheel_cache.cleanup()
329 self._ephem_cache.cleanup()