Mercurial > repos > shellac > sam_consensus_v3
comparison env/lib/python3.9/site-packages/cachecontrol/controller.py @ 0:4f3585e2f14b draft default tip
"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
| author | shellac | 
|---|---|
| date | Mon, 22 Mar 2021 18:12:50 +0000 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:4f3585e2f14b | 
|---|---|
| 1 """ | |
| 2 The httplib2 algorithms ported for use with requests. | |
| 3 """ | |
| 4 import logging | |
| 5 import re | |
| 6 import calendar | |
| 7 import time | |
| 8 from email.utils import parsedate_tz | |
| 9 | |
| 10 from requests.structures import CaseInsensitiveDict | |
| 11 | |
| 12 from .cache import DictCache | |
| 13 from .serialize import Serializer | |
| 14 | |
| 15 | |
| 16 logger = logging.getLogger(__name__) | |
| 17 | |
| 18 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") | |
| 19 | |
| 20 | |
| 21 def parse_uri(uri): | |
| 22 """Parses a URI using the regex given in Appendix B of RFC 3986. | |
| 23 | |
| 24 (scheme, authority, path, query, fragment) = parse_uri(uri) | |
| 25 """ | |
| 26 groups = URI.match(uri).groups() | |
| 27 return (groups[1], groups[3], groups[4], groups[6], groups[8]) | |
| 28 | |
| 29 | |
| 30 class CacheController(object): | |
| 31 """An interface to see if request should cached or not. | |
| 32 """ | |
| 33 def __init__(self, cache=None, cache_etags=True, serializer=None): | |
| 34 self.cache = cache or DictCache() | |
| 35 self.cache_etags = cache_etags | |
| 36 self.serializer = serializer or Serializer() | |
| 37 | |
| 38 @classmethod | |
| 39 def _urlnorm(cls, uri): | |
| 40 """Normalize the URL to create a safe key for the cache""" | |
| 41 (scheme, authority, path, query, fragment) = parse_uri(uri) | |
| 42 if not scheme or not authority: | |
| 43 raise Exception("Only absolute URIs are allowed. uri = %s" % uri) | |
| 44 | |
| 45 scheme = scheme.lower() | |
| 46 authority = authority.lower() | |
| 47 | |
| 48 if not path: | |
| 49 path = "/" | |
| 50 | |
| 51 # Could do syntax based normalization of the URI before | |
| 52 # computing the digest. See Section 6.2.2 of Std 66. | |
| 53 request_uri = query and "?".join([path, query]) or path | |
| 54 defrag_uri = scheme + "://" + authority + request_uri | |
| 55 | |
| 56 return defrag_uri | |
| 57 | |
| 58 @classmethod | |
| 59 def cache_url(cls, uri): | |
| 60 return cls._urlnorm(uri) | |
| 61 | |
| 62 def parse_cache_control(self, headers): | |
| 63 """ | |
| 64 Parse the cache control headers returning a dictionary with values | |
| 65 for the different directives. | |
| 66 """ | |
| 67 retval = {} | |
| 68 | |
| 69 cc_header = 'cache-control' | |
| 70 if 'Cache-Control' in headers: | |
| 71 cc_header = 'Cache-Control' | |
| 72 | |
| 73 if cc_header in headers: | |
| 74 parts = headers[cc_header].split(',') | |
| 75 parts_with_args = [ | |
| 76 tuple([x.strip().lower() for x in part.split("=", 1)]) | |
| 77 for part in parts if -1 != part.find("=") | |
| 78 ] | |
| 79 parts_wo_args = [ | |
| 80 (name.strip().lower(), 1) | |
| 81 for name in parts if -1 == name.find("=") | |
| 82 ] | |
| 83 retval = dict(parts_with_args + parts_wo_args) | |
| 84 return retval | |
| 85 | |
| 86 def cached_request(self, request): | |
| 87 """ | |
| 88 Return a cached response if it exists in the cache, otherwise | |
| 89 return False. | |
| 90 """ | |
| 91 cache_url = self.cache_url(request.url) | |
| 92 logger.debug('Looking up "%s" in the cache', cache_url) | |
| 93 cc = self.parse_cache_control(request.headers) | |
| 94 | |
| 95 # Bail out if the request insists on fresh data | |
| 96 if 'no-cache' in cc: | |
| 97 logger.debug('Request header has "no-cache", cache bypassed') | |
| 98 return False | |
| 99 | |
| 100 if 'max-age' in cc and cc['max-age'] == 0: | |
| 101 logger.debug('Request header has "max_age" as 0, cache bypassed') | |
| 102 return False | |
| 103 | |
| 104 # Request allows serving from the cache, let's see if we find something | |
| 105 cache_data = self.cache.get(cache_url) | |
| 106 if cache_data is None: | |
| 107 logger.debug('No cache entry available') | |
| 108 return False | |
| 109 | |
| 110 # Check whether it can be deserialized | |
| 111 resp = self.serializer.loads(request, cache_data) | |
| 112 if not resp: | |
| 113 logger.warning('Cache entry deserialization failed, entry ignored') | |
| 114 return False | |
| 115 | |
| 116 # If we have a cached 301, return it immediately. We don't | |
| 117 # need to test our response for other headers b/c it is | |
| 118 # intrinsically "cacheable" as it is Permanent. | |
| 119 # See: | |
| 120 # https://tools.ietf.org/html/rfc7231#section-6.4.2 | |
| 121 # | |
| 122 # Client can try to refresh the value by repeating the request | |
| 123 # with cache busting headers as usual (ie no-cache). | |
| 124 if resp.status == 301: | |
| 125 msg = ('Returning cached "301 Moved Permanently" response ' | |
| 126 '(ignoring date and etag information)') | |
| 127 logger.debug(msg) | |
| 128 return resp | |
| 129 | |
| 130 headers = CaseInsensitiveDict(resp.headers) | |
| 131 if not headers or 'date' not in headers: | |
| 132 if 'etag' not in headers: | |
| 133 # Without date or etag, the cached response can never be used | |
| 134 # and should be deleted. | |
| 135 logger.debug('Purging cached response: no date or etag') | |
| 136 self.cache.delete(cache_url) | |
| 137 logger.debug('Ignoring cached response: no date') | |
| 138 return False | |
| 139 | |
| 140 now = time.time() | |
| 141 date = calendar.timegm( | |
| 142 parsedate_tz(headers['date']) | |
| 143 ) | |
| 144 current_age = max(0, now - date) | |
| 145 logger.debug('Current age based on date: %i', current_age) | |
| 146 | |
| 147 # TODO: There is an assumption that the result will be a | |
| 148 # urllib3 response object. This may not be best since we | |
| 149 # could probably avoid instantiating or constructing the | |
| 150 # response until we know we need it. | |
| 151 resp_cc = self.parse_cache_control(headers) | |
| 152 | |
| 153 # determine freshness | |
| 154 freshness_lifetime = 0 | |
| 155 | |
| 156 # Check the max-age pragma in the cache control header | |
| 157 if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): | |
| 158 freshness_lifetime = int(resp_cc['max-age']) | |
| 159 logger.debug('Freshness lifetime from max-age: %i', | |
| 160 freshness_lifetime) | |
| 161 | |
| 162 # If there isn't a max-age, check for an expires header | |
| 163 elif 'expires' in headers: | |
| 164 expires = parsedate_tz(headers['expires']) | |
| 165 if expires is not None: | |
| 166 expire_time = calendar.timegm(expires) - date | |
| 167 freshness_lifetime = max(0, expire_time) | |
| 168 logger.debug("Freshness lifetime from expires: %i", | |
| 169 freshness_lifetime) | |
| 170 | |
| 171 # Determine if we are setting freshness limit in the | |
| 172 # request. Note, this overrides what was in the response. | |
| 173 if 'max-age' in cc: | |
| 174 try: | |
| 175 freshness_lifetime = int(cc['max-age']) | |
| 176 logger.debug('Freshness lifetime from request max-age: %i', | |
| 177 freshness_lifetime) | |
| 178 except ValueError: | |
| 179 freshness_lifetime = 0 | |
| 180 | |
| 181 if 'min-fresh' in cc: | |
| 182 try: | |
| 183 min_fresh = int(cc['min-fresh']) | |
| 184 except ValueError: | |
| 185 min_fresh = 0 | |
| 186 # adjust our current age by our min fresh | |
| 187 current_age += min_fresh | |
| 188 logger.debug('Adjusted current age from min-fresh: %i', | |
| 189 current_age) | |
| 190 | |
| 191 # Return entry if it is fresh enough | |
| 192 if freshness_lifetime > current_age: | |
| 193 logger.debug('The response is "fresh", returning cached response') | |
| 194 logger.debug('%i > %i', freshness_lifetime, current_age) | |
| 195 return resp | |
| 196 | |
| 197 # we're not fresh. If we don't have an Etag, clear it out | |
| 198 if 'etag' not in headers: | |
| 199 logger.debug( | |
| 200 'The cached response is "stale" with no etag, purging' | |
| 201 ) | |
| 202 self.cache.delete(cache_url) | |
| 203 | |
| 204 # return the original handler | |
| 205 return False | |
| 206 | |
| 207 def conditional_headers(self, request): | |
| 208 cache_url = self.cache_url(request.url) | |
| 209 resp = self.serializer.loads(request, self.cache.get(cache_url)) | |
| 210 new_headers = {} | |
| 211 | |
| 212 if resp: | |
| 213 headers = CaseInsensitiveDict(resp.headers) | |
| 214 | |
| 215 if 'etag' in headers: | |
| 216 new_headers['If-None-Match'] = headers['ETag'] | |
| 217 | |
| 218 if 'last-modified' in headers: | |
| 219 new_headers['If-Modified-Since'] = headers['Last-Modified'] | |
| 220 | |
| 221 return new_headers | |
| 222 | |
| 223 def cache_response(self, request, response, body=None): | |
| 224 """ | |
| 225 Algorithm for caching requests. | |
| 226 | |
| 227 This assumes a requests Response object. | |
| 228 """ | |
| 229 # From httplib2: Don't cache 206's since we aren't going to | |
| 230 # handle byte range requests | |
| 231 cacheable_status_codes = [200, 203, 300, 301] | |
| 232 if response.status not in cacheable_status_codes: | |
| 233 logger.debug( | |
| 234 'Status code %s not in %s', | |
| 235 response.status, | |
| 236 cacheable_status_codes | |
| 237 ) | |
| 238 return | |
| 239 | |
| 240 response_headers = CaseInsensitiveDict(response.headers) | |
| 241 | |
| 242 # If we've been given a body, our response has a Content-Length, that | |
| 243 # Content-Length is valid then we can check to see if the body we've | |
| 244 # been given matches the expected size, and if it doesn't we'll just | |
| 245 # skip trying to cache it. | |
| 246 if (body is not None and | |
| 247 "content-length" in response_headers and | |
| 248 response_headers["content-length"].isdigit() and | |
| 249 int(response_headers["content-length"]) != len(body)): | |
| 250 return | |
| 251 | |
| 252 cc_req = self.parse_cache_control(request.headers) | |
| 253 cc = self.parse_cache_control(response_headers) | |
| 254 | |
| 255 cache_url = self.cache_url(request.url) | |
| 256 logger.debug('Updating cache with response from "%s"', cache_url) | |
| 257 | |
| 258 # Delete it from the cache if we happen to have it stored there | |
| 259 no_store = False | |
| 260 if cc.get('no-store'): | |
| 261 no_store = True | |
| 262 logger.debug('Response header has "no-store"') | |
| 263 if cc_req.get('no-store'): | |
| 264 no_store = True | |
| 265 logger.debug('Request header has "no-store"') | |
| 266 if no_store and self.cache.get(cache_url): | |
| 267 logger.debug('Purging existing cache entry to honor "no-store"') | |
| 268 self.cache.delete(cache_url) | |
| 269 | |
| 270 # If we've been given an etag, then keep the response | |
| 271 if self.cache_etags and 'etag' in response_headers: | |
| 272 logger.debug('Caching due to etag') | |
| 273 self.cache.set( | |
| 274 cache_url, | |
| 275 self.serializer.dumps(request, response, body=body), | |
| 276 ) | |
| 277 | |
| 278 # Add to the cache any 301s. We do this before looking that | |
| 279 # the Date headers. | |
| 280 elif response.status == 301: | |
| 281 logger.debug('Caching permanant redirect') | |
| 282 self.cache.set( | |
| 283 cache_url, | |
| 284 self.serializer.dumps(request, response) | |
| 285 ) | |
| 286 | |
| 287 # Add to the cache if the response headers demand it. If there | |
| 288 # is no date header then we can't do anything about expiring | |
| 289 # the cache. | |
| 290 elif 'date' in response_headers: | |
| 291 # cache when there is a max-age > 0 | |
| 292 if cc and cc.get('max-age'): | |
| 293 if cc['max-age'].isdigit() and int(cc['max-age']) > 0: | |
| 294 logger.debug('Caching b/c date exists and max-age > 0') | |
| 295 self.cache.set( | |
| 296 cache_url, | |
| 297 self.serializer.dumps(request, response, body=body), | |
| 298 ) | |
| 299 | |
| 300 # If the request can expire, it means we should cache it | |
| 301 # in the meantime. | |
| 302 elif 'expires' in response_headers: | |
| 303 if response_headers['expires']: | |
| 304 logger.debug('Caching b/c of expires header') | |
| 305 self.cache.set( | |
| 306 cache_url, | |
| 307 self.serializer.dumps(request, response, body=body), | |
| 308 ) | |
| 309 | |
| 310 def update_cached_response(self, request, response): | |
| 311 """On a 304 we will get a new set of headers that we want to | |
| 312 update our cached value with, assuming we have one. | |
| 313 | |
| 314 This should only ever be called when we've sent an ETag and | |
| 315 gotten a 304 as the response. | |
| 316 """ | |
| 317 cache_url = self.cache_url(request.url) | |
| 318 | |
| 319 cached_response = self.serializer.loads( | |
| 320 request, | |
| 321 self.cache.get(cache_url) | |
| 322 ) | |
| 323 | |
| 324 if not cached_response: | |
| 325 # we didn't have a cached response | |
| 326 return response | |
| 327 | |
| 328 # Lets update our headers with the headers from the new request: | |
| 329 # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1 | |
| 330 # | |
| 331 # The server isn't supposed to send headers that would make | |
| 332 # the cached body invalid. But... just in case, we'll be sure | |
| 333 # to strip out ones we know that might be problmatic due to | |
| 334 # typical assumptions. | |
| 335 excluded_headers = [ | |
| 336 "content-length", | |
| 337 ] | |
| 338 | |
| 339 cached_response.headers.update( | |
| 340 dict((k, v) for k, v in response.headers.items() | |
| 341 if k.lower() not in excluded_headers) | |
| 342 ) | |
| 343 | |
| 344 # we want a 200 b/c we have content via the cache | |
| 345 cached_response.status = 200 | |
| 346 | |
| 347 # update our cache | |
| 348 self.cache.set( | |
| 349 cache_url, | |
| 350 self.serializer.dumps(request, cached_response), | |
| 351 ) | |
| 352 | |
| 353 return cached_response | 
