comparison lib/python3.8/site-packages/pip/_vendor/cachecontrol/controller.py @ 0:9e54283cc701 draft

"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
author guerler
date Mon, 27 Jul 2020 03:47:31 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9e54283cc701
1 """
2 The httplib2 algorithms ported for use with requests.
3 """
4 import logging
5 import re
6 import calendar
7 import time
8 from email.utils import parsedate_tz
9
10 from pip._vendor.requests.structures import CaseInsensitiveDict
11
12 from .cache import DictCache
13 from .serialize import Serializer
14
15
16 logger = logging.getLogger(__name__)
17
18 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
19
20
21 def parse_uri(uri):
22 """Parses a URI using the regex given in Appendix B of RFC 3986.
23
24 (scheme, authority, path, query, fragment) = parse_uri(uri)
25 """
26 groups = URI.match(uri).groups()
27 return (groups[1], groups[3], groups[4], groups[6], groups[8])
28
29
30 class CacheController(object):
31 """An interface to see if request should cached or not.
32 """
33
34 def __init__(
35 self, cache=None, cache_etags=True, serializer=None, status_codes=None
36 ):
37 self.cache = DictCache() if cache is None else cache
38 self.cache_etags = cache_etags
39 self.serializer = serializer or Serializer()
40 self.cacheable_status_codes = status_codes or (200, 203, 300, 301)
41
42 @classmethod
43 def _urlnorm(cls, uri):
44 """Normalize the URL to create a safe key for the cache"""
45 (scheme, authority, path, query, fragment) = parse_uri(uri)
46 if not scheme or not authority:
47 raise Exception("Only absolute URIs are allowed. uri = %s" % uri)
48
49 scheme = scheme.lower()
50 authority = authority.lower()
51
52 if not path:
53 path = "/"
54
55 # Could do syntax based normalization of the URI before
56 # computing the digest. See Section 6.2.2 of Std 66.
57 request_uri = query and "?".join([path, query]) or path
58 defrag_uri = scheme + "://" + authority + request_uri
59
60 return defrag_uri
61
62 @classmethod
63 def cache_url(cls, uri):
64 return cls._urlnorm(uri)
65
66 def parse_cache_control(self, headers):
67 known_directives = {
68 # https://tools.ietf.org/html/rfc7234#section-5.2
69 "max-age": (int, True),
70 "max-stale": (int, False),
71 "min-fresh": (int, True),
72 "no-cache": (None, False),
73 "no-store": (None, False),
74 "no-transform": (None, False),
75 "only-if-cached": (None, False),
76 "must-revalidate": (None, False),
77 "public": (None, False),
78 "private": (None, False),
79 "proxy-revalidate": (None, False),
80 "s-maxage": (int, True),
81 }
82
83 cc_headers = headers.get("cache-control", headers.get("Cache-Control", ""))
84
85 retval = {}
86
87 for cc_directive in cc_headers.split(","):
88 if not cc_directive.strip():
89 continue
90
91 parts = cc_directive.split("=", 1)
92 directive = parts[0].strip()
93
94 try:
95 typ, required = known_directives[directive]
96 except KeyError:
97 logger.debug("Ignoring unknown cache-control directive: %s", directive)
98 continue
99
100 if not typ or not required:
101 retval[directive] = None
102 if typ:
103 try:
104 retval[directive] = typ(parts[1].strip())
105 except IndexError:
106 if required:
107 logger.debug(
108 "Missing value for cache-control " "directive: %s",
109 directive,
110 )
111 except ValueError:
112 logger.debug(
113 "Invalid value for cache-control directive " "%s, must be %s",
114 directive,
115 typ.__name__,
116 )
117
118 return retval
119
120 def cached_request(self, request):
121 """
122 Return a cached response if it exists in the cache, otherwise
123 return False.
124 """
125 cache_url = self.cache_url(request.url)
126 logger.debug('Looking up "%s" in the cache', cache_url)
127 cc = self.parse_cache_control(request.headers)
128
129 # Bail out if the request insists on fresh data
130 if "no-cache" in cc:
131 logger.debug('Request header has "no-cache", cache bypassed')
132 return False
133
134 if "max-age" in cc and cc["max-age"] == 0:
135 logger.debug('Request header has "max_age" as 0, cache bypassed')
136 return False
137
138 # Request allows serving from the cache, let's see if we find something
139 cache_data = self.cache.get(cache_url)
140 if cache_data is None:
141 logger.debug("No cache entry available")
142 return False
143
144 # Check whether it can be deserialized
145 resp = self.serializer.loads(request, cache_data)
146 if not resp:
147 logger.warning("Cache entry deserialization failed, entry ignored")
148 return False
149
150 # If we have a cached 301, return it immediately. We don't
151 # need to test our response for other headers b/c it is
152 # intrinsically "cacheable" as it is Permanent.
153 # See:
154 # https://tools.ietf.org/html/rfc7231#section-6.4.2
155 #
156 # Client can try to refresh the value by repeating the request
157 # with cache busting headers as usual (ie no-cache).
158 if resp.status == 301:
159 msg = (
160 'Returning cached "301 Moved Permanently" response '
161 "(ignoring date and etag information)"
162 )
163 logger.debug(msg)
164 return resp
165
166 headers = CaseInsensitiveDict(resp.headers)
167 if not headers or "date" not in headers:
168 if "etag" not in headers:
169 # Without date or etag, the cached response can never be used
170 # and should be deleted.
171 logger.debug("Purging cached response: no date or etag")
172 self.cache.delete(cache_url)
173 logger.debug("Ignoring cached response: no date")
174 return False
175
176 now = time.time()
177 date = calendar.timegm(parsedate_tz(headers["date"]))
178 current_age = max(0, now - date)
179 logger.debug("Current age based on date: %i", current_age)
180
181 # TODO: There is an assumption that the result will be a
182 # urllib3 response object. This may not be best since we
183 # could probably avoid instantiating or constructing the
184 # response until we know we need it.
185 resp_cc = self.parse_cache_control(headers)
186
187 # determine freshness
188 freshness_lifetime = 0
189
190 # Check the max-age pragma in the cache control header
191 if "max-age" in resp_cc:
192 freshness_lifetime = resp_cc["max-age"]
193 logger.debug("Freshness lifetime from max-age: %i", freshness_lifetime)
194
195 # If there isn't a max-age, check for an expires header
196 elif "expires" in headers:
197 expires = parsedate_tz(headers["expires"])
198 if expires is not None:
199 expire_time = calendar.timegm(expires) - date
200 freshness_lifetime = max(0, expire_time)
201 logger.debug("Freshness lifetime from expires: %i", freshness_lifetime)
202
203 # Determine if we are setting freshness limit in the
204 # request. Note, this overrides what was in the response.
205 if "max-age" in cc:
206 freshness_lifetime = cc["max-age"]
207 logger.debug(
208 "Freshness lifetime from request max-age: %i", freshness_lifetime
209 )
210
211 if "min-fresh" in cc:
212 min_fresh = cc["min-fresh"]
213 # adjust our current age by our min fresh
214 current_age += min_fresh
215 logger.debug("Adjusted current age from min-fresh: %i", current_age)
216
217 # Return entry if it is fresh enough
218 if freshness_lifetime > current_age:
219 logger.debug('The response is "fresh", returning cached response')
220 logger.debug("%i > %i", freshness_lifetime, current_age)
221 return resp
222
223 # we're not fresh. If we don't have an Etag, clear it out
224 if "etag" not in headers:
225 logger.debug('The cached response is "stale" with no etag, purging')
226 self.cache.delete(cache_url)
227
228 # return the original handler
229 return False
230
231 def conditional_headers(self, request):
232 cache_url = self.cache_url(request.url)
233 resp = self.serializer.loads(request, self.cache.get(cache_url))
234 new_headers = {}
235
236 if resp:
237 headers = CaseInsensitiveDict(resp.headers)
238
239 if "etag" in headers:
240 new_headers["If-None-Match"] = headers["ETag"]
241
242 if "last-modified" in headers:
243 new_headers["If-Modified-Since"] = headers["Last-Modified"]
244
245 return new_headers
246
247 def cache_response(self, request, response, body=None, status_codes=None):
248 """
249 Algorithm for caching requests.
250
251 This assumes a requests Response object.
252 """
253 # From httplib2: Don't cache 206's since we aren't going to
254 # handle byte range requests
255 cacheable_status_codes = status_codes or self.cacheable_status_codes
256 if response.status not in cacheable_status_codes:
257 logger.debug(
258 "Status code %s not in %s", response.status, cacheable_status_codes
259 )
260 return
261
262 response_headers = CaseInsensitiveDict(response.headers)
263
264 # If we've been given a body, our response has a Content-Length, that
265 # Content-Length is valid then we can check to see if the body we've
266 # been given matches the expected size, and if it doesn't we'll just
267 # skip trying to cache it.
268 if (
269 body is not None
270 and "content-length" in response_headers
271 and response_headers["content-length"].isdigit()
272 and int(response_headers["content-length"]) != len(body)
273 ):
274 return
275
276 cc_req = self.parse_cache_control(request.headers)
277 cc = self.parse_cache_control(response_headers)
278
279 cache_url = self.cache_url(request.url)
280 logger.debug('Updating cache with response from "%s"', cache_url)
281
282 # Delete it from the cache if we happen to have it stored there
283 no_store = False
284 if "no-store" in cc:
285 no_store = True
286 logger.debug('Response header has "no-store"')
287 if "no-store" in cc_req:
288 no_store = True
289 logger.debug('Request header has "no-store"')
290 if no_store and self.cache.get(cache_url):
291 logger.debug('Purging existing cache entry to honor "no-store"')
292 self.cache.delete(cache_url)
293 if no_store:
294 return
295
296 # https://tools.ietf.org/html/rfc7234#section-4.1:
297 # A Vary header field-value of "*" always fails to match.
298 # Storing such a response leads to a deserialization warning
299 # during cache lookup and is not allowed to ever be served,
300 # so storing it can be avoided.
301 if "*" in response_headers.get("vary", ""):
302 logger.debug('Response header has "Vary: *"')
303 return
304
305 # If we've been given an etag, then keep the response
306 if self.cache_etags and "etag" in response_headers:
307 logger.debug("Caching due to etag")
308 self.cache.set(
309 cache_url, self.serializer.dumps(request, response, body=body)
310 )
311
312 # Add to the cache any 301s. We do this before looking that
313 # the Date headers.
314 elif response.status == 301:
315 logger.debug("Caching permanant redirect")
316 self.cache.set(cache_url, self.serializer.dumps(request, response))
317
318 # Add to the cache if the response headers demand it. If there
319 # is no date header then we can't do anything about expiring
320 # the cache.
321 elif "date" in response_headers:
322 # cache when there is a max-age > 0
323 if "max-age" in cc and cc["max-age"] > 0:
324 logger.debug("Caching b/c date exists and max-age > 0")
325 self.cache.set(
326 cache_url, self.serializer.dumps(request, response, body=body)
327 )
328
329 # If the request can expire, it means we should cache it
330 # in the meantime.
331 elif "expires" in response_headers:
332 if response_headers["expires"]:
333 logger.debug("Caching b/c of expires header")
334 self.cache.set(
335 cache_url, self.serializer.dumps(request, response, body=body)
336 )
337
338 def update_cached_response(self, request, response):
339 """On a 304 we will get a new set of headers that we want to
340 update our cached value with, assuming we have one.
341
342 This should only ever be called when we've sent an ETag and
343 gotten a 304 as the response.
344 """
345 cache_url = self.cache_url(request.url)
346
347 cached_response = self.serializer.loads(request, self.cache.get(cache_url))
348
349 if not cached_response:
350 # we didn't have a cached response
351 return response
352
353 # Lets update our headers with the headers from the new request:
354 # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1
355 #
356 # The server isn't supposed to send headers that would make
357 # the cached body invalid. But... just in case, we'll be sure
358 # to strip out ones we know that might be problmatic due to
359 # typical assumptions.
360 excluded_headers = ["content-length"]
361
362 cached_response.headers.update(
363 dict(
364 (k, v)
365 for k, v in response.headers.items()
366 if k.lower() not in excluded_headers
367 )
368 )
369
370 # we want a 200 b/c we have content via the cache
371 cached_response.status = 200
372
373 # update our cache
374 self.cache.set(cache_url, self.serializer.dumps(request, cached_response))
375
376 return cached_response