Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/cachecontrol/controller.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d30785e31577 |
---|---|
1 """ | |
2 The httplib2 algorithms ported for use with requests. | |
3 """ | |
4 import logging | |
5 import re | |
6 import calendar | |
7 import time | |
8 from email.utils import parsedate_tz | |
9 | |
10 from requests.structures import CaseInsensitiveDict | |
11 | |
12 from .cache import DictCache | |
13 from .serialize import Serializer | |
14 | |
15 | |
16 logger = logging.getLogger(__name__) | |
17 | |
18 URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") | |
19 | |
20 | |
21 def parse_uri(uri): | |
22 """Parses a URI using the regex given in Appendix B of RFC 3986. | |
23 | |
24 (scheme, authority, path, query, fragment) = parse_uri(uri) | |
25 """ | |
26 groups = URI.match(uri).groups() | |
27 return (groups[1], groups[3], groups[4], groups[6], groups[8]) | |
28 | |
29 | |
30 class CacheController(object): | |
31 """An interface to see if request should cached or not. | |
32 """ | |
33 def __init__(self, cache=None, cache_etags=True, serializer=None): | |
34 self.cache = cache or DictCache() | |
35 self.cache_etags = cache_etags | |
36 self.serializer = serializer or Serializer() | |
37 | |
38 @classmethod | |
39 def _urlnorm(cls, uri): | |
40 """Normalize the URL to create a safe key for the cache""" | |
41 (scheme, authority, path, query, fragment) = parse_uri(uri) | |
42 if not scheme or not authority: | |
43 raise Exception("Only absolute URIs are allowed. uri = %s" % uri) | |
44 | |
45 scheme = scheme.lower() | |
46 authority = authority.lower() | |
47 | |
48 if not path: | |
49 path = "/" | |
50 | |
51 # Could do syntax based normalization of the URI before | |
52 # computing the digest. See Section 6.2.2 of Std 66. | |
53 request_uri = query and "?".join([path, query]) or path | |
54 defrag_uri = scheme + "://" + authority + request_uri | |
55 | |
56 return defrag_uri | |
57 | |
58 @classmethod | |
59 def cache_url(cls, uri): | |
60 return cls._urlnorm(uri) | |
61 | |
62 def parse_cache_control(self, headers): | |
63 """ | |
64 Parse the cache control headers returning a dictionary with values | |
65 for the different directives. | |
66 """ | |
67 retval = {} | |
68 | |
69 cc_header = 'cache-control' | |
70 if 'Cache-Control' in headers: | |
71 cc_header = 'Cache-Control' | |
72 | |
73 if cc_header in headers: | |
74 parts = headers[cc_header].split(',') | |
75 parts_with_args = [ | |
76 tuple([x.strip().lower() for x in part.split("=", 1)]) | |
77 for part in parts if -1 != part.find("=") | |
78 ] | |
79 parts_wo_args = [ | |
80 (name.strip().lower(), 1) | |
81 for name in parts if -1 == name.find("=") | |
82 ] | |
83 retval = dict(parts_with_args + parts_wo_args) | |
84 return retval | |
85 | |
86 def cached_request(self, request): | |
87 """ | |
88 Return a cached response if it exists in the cache, otherwise | |
89 return False. | |
90 """ | |
91 cache_url = self.cache_url(request.url) | |
92 logger.debug('Looking up "%s" in the cache', cache_url) | |
93 cc = self.parse_cache_control(request.headers) | |
94 | |
95 # Bail out if the request insists on fresh data | |
96 if 'no-cache' in cc: | |
97 logger.debug('Request header has "no-cache", cache bypassed') | |
98 return False | |
99 | |
100 if 'max-age' in cc and cc['max-age'] == 0: | |
101 logger.debug('Request header has "max_age" as 0, cache bypassed') | |
102 return False | |
103 | |
104 # Request allows serving from the cache, let's see if we find something | |
105 cache_data = self.cache.get(cache_url) | |
106 if cache_data is None: | |
107 logger.debug('No cache entry available') | |
108 return False | |
109 | |
110 # Check whether it can be deserialized | |
111 resp = self.serializer.loads(request, cache_data) | |
112 if not resp: | |
113 logger.warning('Cache entry deserialization failed, entry ignored') | |
114 return False | |
115 | |
116 # If we have a cached 301, return it immediately. We don't | |
117 # need to test our response for other headers b/c it is | |
118 # intrinsically "cacheable" as it is Permanent. | |
119 # See: | |
120 # https://tools.ietf.org/html/rfc7231#section-6.4.2 | |
121 # | |
122 # Client can try to refresh the value by repeating the request | |
123 # with cache busting headers as usual (ie no-cache). | |
124 if resp.status == 301: | |
125 msg = ('Returning cached "301 Moved Permanently" response ' | |
126 '(ignoring date and etag information)') | |
127 logger.debug(msg) | |
128 return resp | |
129 | |
130 headers = CaseInsensitiveDict(resp.headers) | |
131 if not headers or 'date' not in headers: | |
132 if 'etag' not in headers: | |
133 # Without date or etag, the cached response can never be used | |
134 # and should be deleted. | |
135 logger.debug('Purging cached response: no date or etag') | |
136 self.cache.delete(cache_url) | |
137 logger.debug('Ignoring cached response: no date') | |
138 return False | |
139 | |
140 now = time.time() | |
141 date = calendar.timegm( | |
142 parsedate_tz(headers['date']) | |
143 ) | |
144 current_age = max(0, now - date) | |
145 logger.debug('Current age based on date: %i', current_age) | |
146 | |
147 # TODO: There is an assumption that the result will be a | |
148 # urllib3 response object. This may not be best since we | |
149 # could probably avoid instantiating or constructing the | |
150 # response until we know we need it. | |
151 resp_cc = self.parse_cache_control(headers) | |
152 | |
153 # determine freshness | |
154 freshness_lifetime = 0 | |
155 | |
156 # Check the max-age pragma in the cache control header | |
157 if 'max-age' in resp_cc and resp_cc['max-age'].isdigit(): | |
158 freshness_lifetime = int(resp_cc['max-age']) | |
159 logger.debug('Freshness lifetime from max-age: %i', | |
160 freshness_lifetime) | |
161 | |
162 # If there isn't a max-age, check for an expires header | |
163 elif 'expires' in headers: | |
164 expires = parsedate_tz(headers['expires']) | |
165 if expires is not None: | |
166 expire_time = calendar.timegm(expires) - date | |
167 freshness_lifetime = max(0, expire_time) | |
168 logger.debug("Freshness lifetime from expires: %i", | |
169 freshness_lifetime) | |
170 | |
171 # Determine if we are setting freshness limit in the | |
172 # request. Note, this overrides what was in the response. | |
173 if 'max-age' in cc: | |
174 try: | |
175 freshness_lifetime = int(cc['max-age']) | |
176 logger.debug('Freshness lifetime from request max-age: %i', | |
177 freshness_lifetime) | |
178 except ValueError: | |
179 freshness_lifetime = 0 | |
180 | |
181 if 'min-fresh' in cc: | |
182 try: | |
183 min_fresh = int(cc['min-fresh']) | |
184 except ValueError: | |
185 min_fresh = 0 | |
186 # adjust our current age by our min fresh | |
187 current_age += min_fresh | |
188 logger.debug('Adjusted current age from min-fresh: %i', | |
189 current_age) | |
190 | |
191 # Return entry if it is fresh enough | |
192 if freshness_lifetime > current_age: | |
193 logger.debug('The response is "fresh", returning cached response') | |
194 logger.debug('%i > %i', freshness_lifetime, current_age) | |
195 return resp | |
196 | |
197 # we're not fresh. If we don't have an Etag, clear it out | |
198 if 'etag' not in headers: | |
199 logger.debug( | |
200 'The cached response is "stale" with no etag, purging' | |
201 ) | |
202 self.cache.delete(cache_url) | |
203 | |
204 # return the original handler | |
205 return False | |
206 | |
207 def conditional_headers(self, request): | |
208 cache_url = self.cache_url(request.url) | |
209 resp = self.serializer.loads(request, self.cache.get(cache_url)) | |
210 new_headers = {} | |
211 | |
212 if resp: | |
213 headers = CaseInsensitiveDict(resp.headers) | |
214 | |
215 if 'etag' in headers: | |
216 new_headers['If-None-Match'] = headers['ETag'] | |
217 | |
218 if 'last-modified' in headers: | |
219 new_headers['If-Modified-Since'] = headers['Last-Modified'] | |
220 | |
221 return new_headers | |
222 | |
223 def cache_response(self, request, response, body=None): | |
224 """ | |
225 Algorithm for caching requests. | |
226 | |
227 This assumes a requests Response object. | |
228 """ | |
229 # From httplib2: Don't cache 206's since we aren't going to | |
230 # handle byte range requests | |
231 cacheable_status_codes = [200, 203, 300, 301] | |
232 if response.status not in cacheable_status_codes: | |
233 logger.debug( | |
234 'Status code %s not in %s', | |
235 response.status, | |
236 cacheable_status_codes | |
237 ) | |
238 return | |
239 | |
240 response_headers = CaseInsensitiveDict(response.headers) | |
241 | |
242 # If we've been given a body, our response has a Content-Length, that | |
243 # Content-Length is valid then we can check to see if the body we've | |
244 # been given matches the expected size, and if it doesn't we'll just | |
245 # skip trying to cache it. | |
246 if (body is not None and | |
247 "content-length" in response_headers and | |
248 response_headers["content-length"].isdigit() and | |
249 int(response_headers["content-length"]) != len(body)): | |
250 return | |
251 | |
252 cc_req = self.parse_cache_control(request.headers) | |
253 cc = self.parse_cache_control(response_headers) | |
254 | |
255 cache_url = self.cache_url(request.url) | |
256 logger.debug('Updating cache with response from "%s"', cache_url) | |
257 | |
258 # Delete it from the cache if we happen to have it stored there | |
259 no_store = False | |
260 if cc.get('no-store'): | |
261 no_store = True | |
262 logger.debug('Response header has "no-store"') | |
263 if cc_req.get('no-store'): | |
264 no_store = True | |
265 logger.debug('Request header has "no-store"') | |
266 if no_store and self.cache.get(cache_url): | |
267 logger.debug('Purging existing cache entry to honor "no-store"') | |
268 self.cache.delete(cache_url) | |
269 | |
270 # If we've been given an etag, then keep the response | |
271 if self.cache_etags and 'etag' in response_headers: | |
272 logger.debug('Caching due to etag') | |
273 self.cache.set( | |
274 cache_url, | |
275 self.serializer.dumps(request, response, body=body), | |
276 ) | |
277 | |
278 # Add to the cache any 301s. We do this before looking that | |
279 # the Date headers. | |
280 elif response.status == 301: | |
281 logger.debug('Caching permanant redirect') | |
282 self.cache.set( | |
283 cache_url, | |
284 self.serializer.dumps(request, response) | |
285 ) | |
286 | |
287 # Add to the cache if the response headers demand it. If there | |
288 # is no date header then we can't do anything about expiring | |
289 # the cache. | |
290 elif 'date' in response_headers: | |
291 # cache when there is a max-age > 0 | |
292 if cc and cc.get('max-age'): | |
293 if cc['max-age'].isdigit() and int(cc['max-age']) > 0: | |
294 logger.debug('Caching b/c date exists and max-age > 0') | |
295 self.cache.set( | |
296 cache_url, | |
297 self.serializer.dumps(request, response, body=body), | |
298 ) | |
299 | |
300 # If the request can expire, it means we should cache it | |
301 # in the meantime. | |
302 elif 'expires' in response_headers: | |
303 if response_headers['expires']: | |
304 logger.debug('Caching b/c of expires header') | |
305 self.cache.set( | |
306 cache_url, | |
307 self.serializer.dumps(request, response, body=body), | |
308 ) | |
309 | |
310 def update_cached_response(self, request, response): | |
311 """On a 304 we will get a new set of headers that we want to | |
312 update our cached value with, assuming we have one. | |
313 | |
314 This should only ever be called when we've sent an ETag and | |
315 gotten a 304 as the response. | |
316 """ | |
317 cache_url = self.cache_url(request.url) | |
318 | |
319 cached_response = self.serializer.loads( | |
320 request, | |
321 self.cache.get(cache_url) | |
322 ) | |
323 | |
324 if not cached_response: | |
325 # we didn't have a cached response | |
326 return response | |
327 | |
328 # Lets update our headers with the headers from the new request: | |
329 # http://tools.ietf.org/html/draft-ietf-httpbis-p4-conditional-26#section-4.1 | |
330 # | |
331 # The server isn't supposed to send headers that would make | |
332 # the cached body invalid. But... just in case, we'll be sure | |
333 # to strip out ones we know that might be problmatic due to | |
334 # typical assumptions. | |
335 excluded_headers = [ | |
336 "content-length", | |
337 ] | |
338 | |
339 cached_response.headers.update( | |
340 dict((k, v) for k, v in response.headers.items() | |
341 if k.lower() not in excluded_headers) | |
342 ) | |
343 | |
344 # we want a 200 b/c we have content via the cache | |
345 cached_response.status = 200 | |
346 | |
347 # update our cache | |
348 self.cache.set( | |
349 cache_url, | |
350 self.serializer.dumps(request, cached_response), | |
351 ) | |
352 | |
353 return cached_response |