Mercurial > repos > guerler > hhblits
comparison lib/python3.8/site-packages/pip/_internal/cache.py @ 0:9e54283cc701 draft
"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author | guerler |
---|---|
date | Mon, 27 Jul 2020 03:47:31 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9e54283cc701 |
---|---|
1 """Cache Management | |
2 """ | |
3 | |
4 # The following comment should be removed at some point in the future. | |
5 # mypy: strict-optional=False | |
6 | |
7 import hashlib | |
8 import json | |
9 import logging | |
10 import os | |
11 | |
12 from pip._vendor.packaging.tags import interpreter_name, interpreter_version | |
13 from pip._vendor.packaging.utils import canonicalize_name | |
14 | |
15 from pip._internal.exceptions import InvalidWheelFilename | |
16 from pip._internal.models.link import Link | |
17 from pip._internal.models.wheel import Wheel | |
18 from pip._internal.utils.temp_dir import TempDirectory | |
19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
20 from pip._internal.utils.urls import path_to_url | |
21 | |
22 if MYPY_CHECK_RUNNING: | |
23 from typing import Optional, Set, List, Any, Dict | |
24 | |
25 from pip._vendor.packaging.tags import Tag | |
26 | |
27 from pip._internal.models.format_control import FormatControl | |
28 | |
29 logger = logging.getLogger(__name__) | |
30 | |
31 | |
32 def _hash_dict(d): | |
33 # type: (Dict[str, str]) -> str | |
34 """Return a stable sha224 of a dictionary.""" | |
35 s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True) | |
36 return hashlib.sha224(s.encode("ascii")).hexdigest() | |
37 | |
38 | |
39 class Cache(object): | |
40 """An abstract class - provides cache directories for data from links | |
41 | |
42 | |
43 :param cache_dir: The root of the cache. | |
44 :param format_control: An object of FormatControl class to limit | |
45 binaries being read from the cache. | |
46 :param allowed_formats: which formats of files the cache should store. | |
47 ('binary' and 'source' are the only allowed values) | |
48 """ | |
49 | |
50 def __init__(self, cache_dir, format_control, allowed_formats): | |
51 # type: (str, FormatControl, Set[str]) -> None | |
52 super(Cache, self).__init__() | |
53 assert not cache_dir or os.path.isabs(cache_dir) | |
54 self.cache_dir = cache_dir or None | |
55 self.format_control = format_control | |
56 self.allowed_formats = allowed_formats | |
57 | |
58 _valid_formats = {"source", "binary"} | |
59 assert self.allowed_formats.union(_valid_formats) == _valid_formats | |
60 | |
61 def _get_cache_path_parts_legacy(self, link): | |
62 # type: (Link) -> List[str] | |
63 """Get parts of part that must be os.path.joined with cache_dir | |
64 | |
65 Legacy cache key (pip < 20) for compatibility with older caches. | |
66 """ | |
67 | |
68 # We want to generate an url to use as our cache key, we don't want to | |
69 # just re-use the URL because it might have other items in the fragment | |
70 # and we don't care about those. | |
71 key_parts = [link.url_without_fragment] | |
72 if link.hash_name is not None and link.hash is not None: | |
73 key_parts.append("=".join([link.hash_name, link.hash])) | |
74 key_url = "#".join(key_parts) | |
75 | |
76 # Encode our key url with sha224, we'll use this because it has similar | |
77 # security properties to sha256, but with a shorter total output (and | |
78 # thus less secure). However the differences don't make a lot of | |
79 # difference for our use case here. | |
80 hashed = hashlib.sha224(key_url.encode()).hexdigest() | |
81 | |
82 # We want to nest the directories some to prevent having a ton of top | |
83 # level directories where we might run out of sub directories on some | |
84 # FS. | |
85 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] | |
86 | |
87 return parts | |
88 | |
89 def _get_cache_path_parts(self, link): | |
90 # type: (Link) -> List[str] | |
91 """Get parts of part that must be os.path.joined with cache_dir | |
92 """ | |
93 | |
94 # We want to generate an url to use as our cache key, we don't want to | |
95 # just re-use the URL because it might have other items in the fragment | |
96 # and we don't care about those. | |
97 key_parts = {"url": link.url_without_fragment} | |
98 if link.hash_name is not None and link.hash is not None: | |
99 key_parts[link.hash_name] = link.hash | |
100 if link.subdirectory_fragment: | |
101 key_parts["subdirectory"] = link.subdirectory_fragment | |
102 | |
103 # Include interpreter name, major and minor version in cache key | |
104 # to cope with ill-behaved sdists that build a different wheel | |
105 # depending on the python version their setup.py is being run on, | |
106 # and don't encode the difference in compatibility tags. | |
107 # https://github.com/pypa/pip/issues/7296 | |
108 key_parts["interpreter_name"] = interpreter_name() | |
109 key_parts["interpreter_version"] = interpreter_version() | |
110 | |
111 # Encode our key url with sha224, we'll use this because it has similar | |
112 # security properties to sha256, but with a shorter total output (and | |
113 # thus less secure). However the differences don't make a lot of | |
114 # difference for our use case here. | |
115 hashed = _hash_dict(key_parts) | |
116 | |
117 # We want to nest the directories some to prevent having a ton of top | |
118 # level directories where we might run out of sub directories on some | |
119 # FS. | |
120 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] | |
121 | |
122 return parts | |
123 | |
124 def _get_candidates(self, link, canonical_package_name): | |
125 # type: (Link, Optional[str]) -> List[Any] | |
126 can_not_cache = ( | |
127 not self.cache_dir or | |
128 not canonical_package_name or | |
129 not link | |
130 ) | |
131 if can_not_cache: | |
132 return [] | |
133 | |
134 formats = self.format_control.get_allowed_formats( | |
135 canonical_package_name | |
136 ) | |
137 if not self.allowed_formats.intersection(formats): | |
138 return [] | |
139 | |
140 candidates = [] | |
141 path = self.get_path_for_link(link) | |
142 if os.path.isdir(path): | |
143 for candidate in os.listdir(path): | |
144 candidates.append((candidate, path)) | |
145 # TODO remove legacy path lookup in pip>=21 | |
146 legacy_path = self.get_path_for_link_legacy(link) | |
147 if os.path.isdir(legacy_path): | |
148 for candidate in os.listdir(legacy_path): | |
149 candidates.append((candidate, legacy_path)) | |
150 return candidates | |
151 | |
152 def get_path_for_link_legacy(self, link): | |
153 # type: (Link) -> str | |
154 raise NotImplementedError() | |
155 | |
156 def get_path_for_link(self, link): | |
157 # type: (Link) -> str | |
158 """Return a directory to store cached items in for link. | |
159 """ | |
160 raise NotImplementedError() | |
161 | |
162 def get( | |
163 self, | |
164 link, # type: Link | |
165 package_name, # type: Optional[str] | |
166 supported_tags, # type: List[Tag] | |
167 ): | |
168 # type: (...) -> Link | |
169 """Returns a link to a cached item if it exists, otherwise returns the | |
170 passed link. | |
171 """ | |
172 raise NotImplementedError() | |
173 | |
174 def cleanup(self): | |
175 # type: () -> None | |
176 pass | |
177 | |
178 | |
179 class SimpleWheelCache(Cache): | |
180 """A cache of wheels for future installs. | |
181 """ | |
182 | |
183 def __init__(self, cache_dir, format_control): | |
184 # type: (str, FormatControl) -> None | |
185 super(SimpleWheelCache, self).__init__( | |
186 cache_dir, format_control, {"binary"} | |
187 ) | |
188 | |
189 def get_path_for_link_legacy(self, link): | |
190 # type: (Link) -> str | |
191 parts = self._get_cache_path_parts_legacy(link) | |
192 return os.path.join(self.cache_dir, "wheels", *parts) | |
193 | |
194 def get_path_for_link(self, link): | |
195 # type: (Link) -> str | |
196 """Return a directory to store cached wheels for link | |
197 | |
198 Because there are M wheels for any one sdist, we provide a directory | |
199 to cache them in, and then consult that directory when looking up | |
200 cache hits. | |
201 | |
202 We only insert things into the cache if they have plausible version | |
203 numbers, so that we don't contaminate the cache with things that were | |
204 not unique. E.g. ./package might have dozens of installs done for it | |
205 and build a version of 0.0...and if we built and cached a wheel, we'd | |
206 end up using the same wheel even if the source has been edited. | |
207 | |
208 :param link: The link of the sdist for which this will cache wheels. | |
209 """ | |
210 parts = self._get_cache_path_parts(link) | |
211 | |
212 # Store wheels within the root cache_dir | |
213 return os.path.join(self.cache_dir, "wheels", *parts) | |
214 | |
215 def get( | |
216 self, | |
217 link, # type: Link | |
218 package_name, # type: Optional[str] | |
219 supported_tags, # type: List[Tag] | |
220 ): | |
221 # type: (...) -> Link | |
222 candidates = [] | |
223 | |
224 if not package_name: | |
225 return link | |
226 | |
227 canonical_package_name = canonicalize_name(package_name) | |
228 for wheel_name, wheel_dir in self._get_candidates( | |
229 link, canonical_package_name | |
230 ): | |
231 try: | |
232 wheel = Wheel(wheel_name) | |
233 except InvalidWheelFilename: | |
234 continue | |
235 if canonicalize_name(wheel.name) != canonical_package_name: | |
236 logger.debug( | |
237 "Ignoring cached wheel {} for {} as it " | |
238 "does not match the expected distribution name {}.".format( | |
239 wheel_name, link, package_name | |
240 ) | |
241 ) | |
242 continue | |
243 if not wheel.supported(supported_tags): | |
244 # Built for a different python/arch/etc | |
245 continue | |
246 candidates.append( | |
247 ( | |
248 wheel.support_index_min(supported_tags), | |
249 wheel_name, | |
250 wheel_dir, | |
251 ) | |
252 ) | |
253 | |
254 if not candidates: | |
255 return link | |
256 | |
257 _, wheel_name, wheel_dir = min(candidates) | |
258 return Link(path_to_url(os.path.join(wheel_dir, wheel_name))) | |
259 | |
260 | |
261 class EphemWheelCache(SimpleWheelCache): | |
262 """A SimpleWheelCache that creates it's own temporary cache directory | |
263 """ | |
264 | |
265 def __init__(self, format_control): | |
266 # type: (FormatControl) -> None | |
267 self._temp_dir = TempDirectory(kind="ephem-wheel-cache") | |
268 | |
269 super(EphemWheelCache, self).__init__( | |
270 self._temp_dir.path, format_control | |
271 ) | |
272 | |
273 def cleanup(self): | |
274 # type: () -> None | |
275 self._temp_dir.cleanup() | |
276 | |
277 | |
278 class WheelCache(Cache): | |
279 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache | |
280 | |
281 This Cache allows for gracefully degradation, using the ephem wheel cache | |
282 when a certain link is not found in the simple wheel cache first. | |
283 """ | |
284 | |
285 def __init__(self, cache_dir, format_control): | |
286 # type: (str, FormatControl) -> None | |
287 super(WheelCache, self).__init__( | |
288 cache_dir, format_control, {'binary'} | |
289 ) | |
290 self._wheel_cache = SimpleWheelCache(cache_dir, format_control) | |
291 self._ephem_cache = EphemWheelCache(format_control) | |
292 | |
293 def get_path_for_link_legacy(self, link): | |
294 # type: (Link) -> str | |
295 return self._wheel_cache.get_path_for_link_legacy(link) | |
296 | |
297 def get_path_for_link(self, link): | |
298 # type: (Link) -> str | |
299 return self._wheel_cache.get_path_for_link(link) | |
300 | |
301 def get_ephem_path_for_link(self, link): | |
302 # type: (Link) -> str | |
303 return self._ephem_cache.get_path_for_link(link) | |
304 | |
305 def get( | |
306 self, | |
307 link, # type: Link | |
308 package_name, # type: Optional[str] | |
309 supported_tags, # type: List[Tag] | |
310 ): | |
311 # type: (...) -> Link | |
312 retval = self._wheel_cache.get( | |
313 link=link, | |
314 package_name=package_name, | |
315 supported_tags=supported_tags, | |
316 ) | |
317 if retval is not link: | |
318 return retval | |
319 | |
320 return self._ephem_cache.get( | |
321 link=link, | |
322 package_name=package_name, | |
323 supported_tags=supported_tags, | |
324 ) | |
325 | |
326 def cleanup(self): | |
327 # type: () -> None | |
328 self._wheel_cache.cleanup() | |
329 self._ephem_cache.cleanup() |