Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/s3/key.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ | |
2 # Copyright (c) 2011, Nexenta Systems Inc. | |
3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved | |
4 # | |
5 # Permission is hereby granted, free of charge, to any person obtaining a | |
6 # copy of this software and associated documentation files (the | |
7 # "Software"), to deal in the Software without restriction, including | |
8 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
9 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
10 # persons to whom the Software is furnished to do so, subject to the fol- | |
11 # lowing conditions: | |
12 # | |
13 # The above copyright notice and this permission notice shall be included | |
14 # in all copies or substantial portions of the Software. | |
15 # | |
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
22 # IN THE SOFTWARE. | |
23 import email.utils | |
24 import errno | |
25 import hashlib | |
26 import mimetypes | |
27 import os | |
28 import re | |
29 import base64 | |
30 import binascii | |
31 import math | |
32 from hashlib import md5 | |
33 import boto.utils | |
34 from boto.compat import BytesIO, six, urllib, encodebytes | |
35 | |
36 from boto.exception import BotoClientError | |
37 from boto.exception import StorageDataError | |
38 from boto.exception import PleaseRetryException | |
39 from boto.provider import Provider | |
40 from boto.s3.keyfile import KeyFile | |
41 from boto.s3.user import User | |
42 from boto import UserAgent | |
43 from boto.utils import compute_md5, compute_hash | |
44 from boto.utils import find_matching_headers | |
45 from boto.utils import merge_headers_by_name | |
46 | |
47 | |
48 class Key(object): | |
49 """ | |
50 Represents a key (object) in an S3 bucket. | |
51 | |
52 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. | |
53 :ivar name: The name of this Key object. | |
54 :ivar metadata: A dictionary containing user metadata that you | |
55 wish to store with the object or that has been retrieved from | |
56 an existing object. | |
57 :ivar cache_control: The value of the `Cache-Control` HTTP header. | |
58 :ivar content_type: The value of the `Content-Type` HTTP header. | |
59 :ivar content_encoding: The value of the `Content-Encoding` HTTP header. | |
60 :ivar content_disposition: The value of the `Content-Disposition` HTTP | |
61 header. | |
62 :ivar content_language: The value of the `Content-Language` HTTP header. | |
63 :ivar etag: The `etag` associated with this object. | |
64 :ivar last_modified: The string timestamp representing the last | |
65 time this object was modified in S3. | |
66 :ivar owner: The ID of the owner of this object. | |
67 :ivar storage_class: The storage class of the object. Currently, one of: | |
68 STANDARD | REDUCED_REDUNDANCY | GLACIER | |
69 :ivar md5: The MD5 hash of the contents of the object. | |
70 :ivar size: The size, in bytes, of the object. | |
71 :ivar version_id: The version ID of this object, if it is a versioned | |
72 object. | |
73 :ivar encrypted: Whether the object is encrypted while at rest on | |
74 the server. | |
75 """ | |
76 | |
77 DefaultContentType = 'application/octet-stream' | |
78 | |
79 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> | |
80 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> | |
81 <Days>%s</Days> | |
82 </RestoreRequest>""" | |
83 | |
84 | |
85 BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192) | |
86 | |
87 # The object metadata fields a user can set, other than custom metadata | |
88 # fields (i.e., those beginning with a provider-specific prefix like | |
89 # x-amz-meta). | |
90 base_user_settable_fields = set(["cache-control", "content-disposition", | |
91 "content-encoding", "content-language", | |
92 "content-md5", "content-type", | |
93 "x-robots-tag", "expires"]) | |
94 _underscore_base_user_settable_fields = set() | |
95 for f in base_user_settable_fields: | |
96 _underscore_base_user_settable_fields.add(f.replace('-', '_')) | |
97 # Metadata fields, whether user-settable or not, other than custom | |
98 # metadata fields (i.e., those beginning with a provider specific prefix | |
99 # like x-amz-meta). | |
100 base_fields = (base_user_settable_fields | | |
101 set(["last-modified", "content-length", "date", "etag"])) | |
102 | |
103 | |
104 | |
105 def __init__(self, bucket=None, name=None): | |
106 self.bucket = bucket | |
107 self.name = name | |
108 self.metadata = {} | |
109 self.cache_control = None | |
110 self.content_type = self.DefaultContentType | |
111 self.content_encoding = None | |
112 self.content_disposition = None | |
113 self.content_language = None | |
114 self.filename = None | |
115 self.etag = None | |
116 self.is_latest = False | |
117 self.last_modified = None | |
118 self.owner = None | |
119 self._storage_class = None | |
120 self.path = None | |
121 self.resp = None | |
122 self.mode = None | |
123 self.size = None | |
124 self.version_id = None | |
125 self.source_version_id = None | |
126 self.delete_marker = False | |
127 self.encrypted = None | |
128 # If the object is being restored, this attribute will be set to True. | |
129 # If the object is restored, it will be set to False. Otherwise this | |
130 # value will be None. If the restore is completed (ongoing_restore = | |
131 # False), the expiry_date will be populated with the expiry date of the | |
132 # restored object. | |
133 self.ongoing_restore = None | |
134 self.expiry_date = None | |
135 self.local_hashes = {} | |
136 | |
137 def __repr__(self): | |
138 if self.bucket: | |
139 name = u'<Key: %s,%s>' % (self.bucket.name, self.name) | |
140 else: | |
141 name = u'<Key: None,%s>' % self.name | |
142 | |
143 # Encode to bytes for Python 2 to prevent display decoding issues | |
144 if not isinstance(name, str): | |
145 name = name.encode('utf-8') | |
146 | |
147 return name | |
148 | |
149 def __iter__(self): | |
150 return self | |
151 | |
152 @property | |
153 def provider(self): | |
154 provider = None | |
155 if self.bucket and self.bucket.connection: | |
156 provider = self.bucket.connection.provider | |
157 return provider | |
158 | |
159 def _get_key(self): | |
160 return self.name | |
161 | |
162 def _set_key(self, value): | |
163 self.name = value | |
164 | |
165 key = property(_get_key, _set_key); | |
166 | |
167 def _get_md5(self): | |
168 if 'md5' in self.local_hashes and self.local_hashes['md5']: | |
169 return binascii.b2a_hex(self.local_hashes['md5']) | |
170 | |
171 def _set_md5(self, value): | |
172 if value: | |
173 self.local_hashes['md5'] = binascii.a2b_hex(value) | |
174 elif 'md5' in self.local_hashes: | |
175 self.local_hashes.pop('md5', None) | |
176 | |
177 md5 = property(_get_md5, _set_md5); | |
178 | |
179 def _get_base64md5(self): | |
180 if 'md5' in self.local_hashes and self.local_hashes['md5']: | |
181 md5 = self.local_hashes['md5'] | |
182 if not isinstance(md5, bytes): | |
183 md5 = md5.encode('utf-8') | |
184 return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n') | |
185 | |
186 def _set_base64md5(self, value): | |
187 if value: | |
188 if not isinstance(value, six.string_types): | |
189 value = value.decode('utf-8') | |
190 self.local_hashes['md5'] = binascii.a2b_base64(value) | |
191 elif 'md5' in self.local_hashes: | |
192 del self.local_hashes['md5'] | |
193 | |
194 base64md5 = property(_get_base64md5, _set_base64md5); | |
195 | |
196 def _get_storage_class(self): | |
197 if self._storage_class is None and self.bucket: | |
198 # Attempt to fetch storage class | |
199 list_items = list(self.bucket.list(self.name.encode('utf-8'))) | |
200 if len(list_items) and getattr(list_items[0], '_storage_class', | |
201 None): | |
202 self._storage_class = list_items[0]._storage_class | |
203 else: | |
204 # Key is not yet saved? Just use default... | |
205 self._storage_class = 'STANDARD' | |
206 | |
207 return self._storage_class | |
208 | |
209 def _set_storage_class(self, value): | |
210 self._storage_class = value | |
211 | |
212 storage_class = property(_get_storage_class, _set_storage_class) | |
213 | |
214 def get_md5_from_hexdigest(self, md5_hexdigest): | |
215 """ | |
216 A utility function to create the 2-tuple (md5hexdigest, base64md5) | |
217 from just having a precalculated md5_hexdigest. | |
218 """ | |
219 digest = binascii.unhexlify(md5_hexdigest) | |
220 base64md5 = encodebytes(digest) | |
221 if base64md5[-1] == '\n': | |
222 base64md5 = base64md5[0:-1] | |
223 return (md5_hexdigest, base64md5) | |
224 | |
225 def handle_encryption_headers(self, resp): | |
226 provider = self.bucket.connection.provider | |
227 if provider.server_side_encryption_header: | |
228 self.encrypted = resp.getheader( | |
229 provider.server_side_encryption_header, None) | |
230 else: | |
231 self.encrypted = None | |
232 | |
233 def handle_storage_class_header(self, resp): | |
234 provider = self.bucket.connection.provider | |
235 if provider.storage_class_header: | |
236 self._storage_class = resp.getheader( | |
237 provider.storage_class_header, None) | |
238 if (self._storage_class is None and | |
239 provider.get_provider_name() == 'aws'): | |
240 # S3 docs for HEAD object requests say S3 will return this | |
241 # header for all objects except Standard storage class objects. | |
242 self._storage_class = 'STANDARD' | |
243 | |
244 | |
245 def handle_version_headers(self, resp, force=False): | |
246 provider = self.bucket.connection.provider | |
247 # If the Key object already has a version_id attribute value, it | |
248 # means that it represents an explicit version and the user is | |
249 # doing a get_contents_*(version_id=<foo>) to retrieve another | |
250 # version of the Key. In that case, we don't really want to | |
251 # overwrite the version_id in this Key object. Comprende? | |
252 if self.version_id is None or force: | |
253 self.version_id = resp.getheader(provider.version_id, None) | |
254 self.source_version_id = resp.getheader(provider.copy_source_version_id, | |
255 None) | |
256 if resp.getheader(provider.delete_marker, 'false') == 'true': | |
257 self.delete_marker = True | |
258 else: | |
259 self.delete_marker = False | |
260 | |
261 def handle_restore_headers(self, response): | |
262 provider = self.bucket.connection.provider | |
263 header = response.getheader(provider.restore_header) | |
264 if header is None: | |
265 return | |
266 parts = header.split(',', 1) | |
267 for part in parts: | |
268 key, val = [i.strip() for i in part.split('=')] | |
269 val = val.replace('"', '') | |
270 if key == 'ongoing-request': | |
271 self.ongoing_restore = True if val.lower() == 'true' else False | |
272 elif key == 'expiry-date': | |
273 self.expiry_date = val | |
274 | |
275 def handle_addl_headers(self, headers): | |
276 """ | |
277 Used by Key subclasses to do additional, provider-specific | |
278 processing of response headers. No-op for this base class. | |
279 """ | |
280 pass | |
281 | |
282 def open_read(self, headers=None, query_args='', | |
283 override_num_retries=None, response_headers=None): | |
284 """ | |
285 Open this key for reading | |
286 | |
287 :type headers: dict | |
288 :param headers: Headers to pass in the web request | |
289 | |
290 :type query_args: string | |
291 :param query_args: Arguments to pass in the query string | |
292 (ie, 'torrent') | |
293 | |
294 :type override_num_retries: int | |
295 :param override_num_retries: If not None will override configured | |
296 num_retries parameter for underlying GET. | |
297 | |
298 :type response_headers: dict | |
299 :param response_headers: A dictionary containing HTTP | |
300 headers/values that will override any headers associated | |
301 with the stored object in the response. See | |
302 http://goo.gl/EWOPb for details. | |
303 """ | |
304 if self.resp is None: | |
305 self.mode = 'r' | |
306 | |
307 provider = self.bucket.connection.provider | |
308 self.resp = self.bucket.connection.make_request( | |
309 'GET', self.bucket.name, self.name, headers, | |
310 query_args=query_args, | |
311 override_num_retries=override_num_retries) | |
312 if self.resp.status < 199 or self.resp.status > 299: | |
313 body = self.resp.read() | |
314 raise provider.storage_response_error(self.resp.status, | |
315 self.resp.reason, body) | |
316 response_headers = self.resp.msg | |
317 self.metadata = boto.utils.get_aws_metadata(response_headers, | |
318 provider) | |
319 for name, value in response_headers.items(): | |
320 # To get correct size for Range GETs, use Content-Range | |
321 # header if one was returned. If not, use Content-Length | |
322 # header. | |
323 if (name.lower() == 'content-length' and | |
324 'Content-Range' not in response_headers): | |
325 self.size = int(value) | |
326 elif name.lower() == 'content-range': | |
327 end_range = re.sub('.*/(.*)', '\\1', value) | |
328 self.size = int(end_range) | |
329 elif name.lower() in Key.base_fields: | |
330 self.__dict__[name.lower().replace('-', '_')] = value | |
331 self.handle_version_headers(self.resp) | |
332 self.handle_encryption_headers(self.resp) | |
333 self.handle_restore_headers(self.resp) | |
334 self.handle_addl_headers(self.resp.getheaders()) | |
335 | |
336 def open_write(self, headers=None, override_num_retries=None): | |
337 """ | |
338 Open this key for writing. | |
339 Not yet implemented | |
340 | |
341 :type headers: dict | |
342 :param headers: Headers to pass in the write request | |
343 | |
344 :type override_num_retries: int | |
345 :param override_num_retries: If not None will override configured | |
346 num_retries parameter for underlying PUT. | |
347 """ | |
348 raise BotoClientError('Not Implemented') | |
349 | |
350 def open(self, mode='r', headers=None, query_args=None, | |
351 override_num_retries=None): | |
352 if mode == 'r': | |
353 self.mode = 'r' | |
354 self.open_read(headers=headers, query_args=query_args, | |
355 override_num_retries=override_num_retries) | |
356 elif mode == 'w': | |
357 self.mode = 'w' | |
358 self.open_write(headers=headers, | |
359 override_num_retries=override_num_retries) | |
360 else: | |
361 raise BotoClientError('Invalid mode: %s' % mode) | |
362 | |
363 closed = False | |
364 | |
365 def close(self, fast=False): | |
366 """ | |
367 Close this key. | |
368 | |
369 :type fast: bool | |
370 :param fast: True if you want the connection to be closed without first | |
371 reading the content. This should only be used in cases where subsequent | |
372 calls don't need to return the content from the open HTTP connection. | |
373 Note: As explained at | |
374 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse, | |
375 callers must read the whole response before sending a new request to the | |
376 server. Calling Key.close(fast=True) and making a subsequent request to | |
377 the server will work because boto will get an httplib exception and | |
378 close/reopen the connection. | |
379 | |
380 """ | |
381 if self.resp and not fast: | |
382 self.resp.read() | |
383 self.resp = None | |
384 self.mode = None | |
385 self.closed = True | |
386 | |
387 def next(self): | |
388 """ | |
389 By providing a next method, the key object supports use as an iterator. | |
390 For example, you can now say: | |
391 | |
392 for bytes in key: | |
393 write bytes to a file or whatever | |
394 | |
395 All of the HTTP connection stuff is handled for you. | |
396 """ | |
397 self.open_read() | |
398 data = self.resp.read(self.BufferSize) | |
399 if not data: | |
400 self.close() | |
401 raise StopIteration | |
402 return data | |
403 | |
404 # Python 3 iterator support | |
405 __next__ = next | |
406 | |
407 def read(self, size=0): | |
408 self.open_read() | |
409 if size == 0: | |
410 data = self.resp.read() | |
411 else: | |
412 data = self.resp.read(size) | |
413 if not data: | |
414 self.close() | |
415 return data | |
416 | |
417 def change_storage_class(self, new_storage_class, dst_bucket=None, | |
418 validate_dst_bucket=True): | |
419 """ | |
420 Change the storage class of an existing key. | |
421 Depending on whether a different destination bucket is supplied | |
422 or not, this will either move the item within the bucket, preserving | |
423 all metadata and ACL info bucket changing the storage class or it | |
424 will copy the item to the provided destination bucket, also | |
425 preserving metadata and ACL info. | |
426 | |
427 :type new_storage_class: string | |
428 :param new_storage_class: The new storage class for the Key. | |
429 Possible values are: | |
430 * STANDARD | |
431 * REDUCED_REDUNDANCY | |
432 | |
433 :type dst_bucket: string | |
434 :param dst_bucket: The name of a destination bucket. If not | |
435 provided the current bucket of the key will be used. | |
436 | |
437 :type validate_dst_bucket: bool | |
438 :param validate_dst_bucket: If True, will validate the dst_bucket | |
439 by using an extra list request. | |
440 """ | |
441 bucket_name = dst_bucket or self.bucket.name | |
442 if new_storage_class == 'STANDARD': | |
443 return self.copy(bucket_name, self.name, | |
444 reduced_redundancy=False, preserve_acl=True, | |
445 validate_dst_bucket=validate_dst_bucket) | |
446 elif new_storage_class == 'REDUCED_REDUNDANCY': | |
447 return self.copy(bucket_name, self.name, | |
448 reduced_redundancy=True, preserve_acl=True, | |
449 validate_dst_bucket=validate_dst_bucket) | |
450 else: | |
451 raise BotoClientError('Invalid storage class: %s' % | |
452 new_storage_class) | |
453 | |
454 def copy(self, dst_bucket, dst_key, metadata=None, | |
455 reduced_redundancy=False, preserve_acl=False, | |
456 encrypt_key=False, validate_dst_bucket=True): | |
457 """ | |
458 Copy this Key to another bucket. | |
459 | |
460 :type dst_bucket: string | |
461 :param dst_bucket: The name of the destination bucket | |
462 | |
463 :type dst_key: string | |
464 :param dst_key: The name of the destination key | |
465 | |
466 :type metadata: dict | |
467 :param metadata: Metadata to be associated with new key. If | |
468 metadata is supplied, it will replace the metadata of the | |
469 source key being copied. If no metadata is supplied, the | |
470 source key's metadata will be copied to the new key. | |
471 | |
472 :type reduced_redundancy: bool | |
473 :param reduced_redundancy: If True, this will force the | |
474 storage class of the new Key to be REDUCED_REDUNDANCY | |
475 regardless of the storage class of the key being copied. | |
476 The Reduced Redundancy Storage (RRS) feature of S3, | |
477 provides lower redundancy at lower storage cost. | |
478 | |
479 :type preserve_acl: bool | |
480 :param preserve_acl: If True, the ACL from the source key will | |
481 be copied to the destination key. If False, the | |
482 destination key will have the default ACL. Note that | |
483 preserving the ACL in the new key object will require two | |
484 additional API calls to S3, one to retrieve the current | |
485 ACL and one to set that ACL on the new object. If you | |
486 don't care about the ACL, a value of False will be | |
487 significantly more efficient. | |
488 | |
489 :type encrypt_key: bool | |
490 :param encrypt_key: If True, the new copy of the object will | |
491 be encrypted on the server-side by S3 and will be stored | |
492 in an encrypted form while at rest in S3. | |
493 | |
494 :type validate_dst_bucket: bool | |
495 :param validate_dst_bucket: If True, will validate the dst_bucket | |
496 by using an extra list request. | |
497 | |
498 :rtype: :class:`boto.s3.key.Key` or subclass | |
499 :returns: An instance of the newly created key object | |
500 """ | |
501 dst_bucket = self.bucket.connection.lookup(dst_bucket, | |
502 validate_dst_bucket) | |
503 if reduced_redundancy: | |
504 storage_class = 'REDUCED_REDUNDANCY' | |
505 else: | |
506 storage_class = self.storage_class | |
507 return dst_bucket.copy_key(dst_key, self.bucket.name, | |
508 self.name, metadata, | |
509 storage_class=storage_class, | |
510 preserve_acl=preserve_acl, | |
511 encrypt_key=encrypt_key, | |
512 src_version_id=self.version_id) | |
513 | |
514 def startElement(self, name, attrs, connection): | |
515 if name == 'Owner': | |
516 self.owner = User(self) | |
517 return self.owner | |
518 else: | |
519 return None | |
520 | |
521 def endElement(self, name, value, connection): | |
522 if name == 'Key': | |
523 self.name = value | |
524 elif name == 'ETag': | |
525 self.etag = value | |
526 elif name == 'IsLatest': | |
527 if value == 'true': | |
528 self.is_latest = True | |
529 else: | |
530 self.is_latest = False | |
531 elif name == 'LastModified': | |
532 self.last_modified = value | |
533 elif name == 'Size': | |
534 self.size = int(value) | |
535 elif name == 'StorageClass': | |
536 self.storage_class = value | |
537 elif name == 'Owner': | |
538 pass | |
539 elif name == 'VersionId': | |
540 self.version_id = value | |
541 else: | |
542 setattr(self, name, value) | |
543 | |
544 def exists(self, headers=None): | |
545 """ | |
546 Returns True if the key exists | |
547 | |
548 :rtype: bool | |
549 :return: Whether the key exists on S3 | |
550 """ | |
551 return bool(self.bucket.lookup(self.name, headers=headers)) | |
552 | |
553 def delete(self, headers=None): | |
554 """ | |
555 Delete this key from S3 | |
556 """ | |
557 return self.bucket.delete_key(self.name, version_id=self.version_id, | |
558 headers=headers) | |
559 | |
560 def get_metadata(self, name): | |
561 return self.metadata.get(name) | |
562 | |
563 def set_metadata(self, name, value): | |
564 # Ensure that metadata that is vital to signing is in the correct | |
565 # case. Applies to ``Content-Type`` & ``Content-MD5``. | |
566 if name.lower() == 'content-type': | |
567 self.metadata['Content-Type'] = value | |
568 elif name.lower() == 'content-md5': | |
569 self.metadata['Content-MD5'] = value | |
570 else: | |
571 self.metadata[name] = value | |
572 if name.lower() in Key.base_user_settable_fields: | |
573 self.__dict__[name.lower().replace('-', '_')] = value | |
574 | |
575 def update_metadata(self, d): | |
576 self.metadata.update(d) | |
577 | |
578 # convenience methods for setting/getting ACL | |
579 def set_acl(self, acl_str, headers=None): | |
580 if self.bucket is not None: | |
581 self.bucket.set_acl(acl_str, self.name, headers=headers) | |
582 | |
583 def get_acl(self, headers=None): | |
584 if self.bucket is not None: | |
585 return self.bucket.get_acl(self.name, headers=headers) | |
586 | |
587 def get_xml_acl(self, headers=None): | |
588 if self.bucket is not None: | |
589 return self.bucket.get_xml_acl(self.name, headers=headers) | |
590 | |
591 def set_xml_acl(self, acl_str, headers=None): | |
592 if self.bucket is not None: | |
593 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) | |
594 | |
595 def set_canned_acl(self, acl_str, headers=None): | |
596 return self.bucket.set_canned_acl(acl_str, self.name, headers) | |
597 | |
598 def get_redirect(self): | |
599 """Return the redirect location configured for this key. | |
600 | |
601 If no redirect is configured (via set_redirect), then None | |
602 will be returned. | |
603 | |
604 """ | |
605 response = self.bucket.connection.make_request( | |
606 'HEAD', self.bucket.name, self.name) | |
607 if response.status == 200: | |
608 return response.getheader('x-amz-website-redirect-location') | |
609 else: | |
610 raise self.provider.storage_response_error( | |
611 response.status, response.reason, response.read()) | |
612 | |
613 def set_redirect(self, redirect_location, headers=None): | |
614 """Configure this key to redirect to another location. | |
615 | |
616 When the bucket associated with this key is accessed from the website | |
617 endpoint, a 301 redirect will be issued to the specified | |
618 `redirect_location`. | |
619 | |
620 :type redirect_location: string | |
621 :param redirect_location: The location to redirect. | |
622 | |
623 """ | |
624 if headers is None: | |
625 headers = {} | |
626 else: | |
627 headers = headers.copy() | |
628 | |
629 headers['x-amz-website-redirect-location'] = redirect_location | |
630 response = self.bucket.connection.make_request('PUT', self.bucket.name, | |
631 self.name, headers) | |
632 if response.status == 200: | |
633 return True | |
634 else: | |
635 raise self.provider.storage_response_error( | |
636 response.status, response.reason, response.read()) | |
637 | |
638 def make_public(self, headers=None): | |
639 return self.bucket.set_canned_acl('public-read', self.name, headers) | |
640 | |
641 def generate_url(self, expires_in, method='GET', headers=None, | |
642 query_auth=True, force_http=False, response_headers=None, | |
643 expires_in_absolute=False, version_id=None, | |
644 policy=None, reduced_redundancy=False, encrypt_key=False): | |
645 """ | |
646 Generate a URL to access this key. | |
647 | |
648 :type expires_in: int | |
649 :param expires_in: How long the url is valid for, in seconds. | |
650 | |
651 :type method: string | |
652 :param method: The method to use for retrieving the file | |
653 (default is GET). | |
654 | |
655 :type headers: dict | |
656 :param headers: Any headers to pass along in the request. | |
657 | |
658 :type query_auth: bool | |
659 :param query_auth: If True, signs the request in the URL. | |
660 | |
661 :type force_http: bool | |
662 :param force_http: If True, http will be used instead of https. | |
663 | |
664 :type response_headers: dict | |
665 :param response_headers: A dictionary containing HTTP | |
666 headers/values that will override any headers associated | |
667 with the stored object in the response. See | |
668 http://goo.gl/EWOPb for details. | |
669 | |
670 :type expires_in_absolute: bool | |
671 :param expires_in_absolute: | |
672 | |
673 :type version_id: string | |
674 :param version_id: The version_id of the object to GET. If specified | |
675 this overrides any value in the key. | |
676 | |
677 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
678 :param policy: A canned ACL policy that will be applied to the | |
679 new key in S3. | |
680 | |
681 :type reduced_redundancy: bool | |
682 :param reduced_redundancy: If True, this will set the storage | |
683 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
684 Redundancy Storage (RRS) feature of S3, provides lower | |
685 redundancy at lower storage cost. | |
686 | |
687 :type encrypt_key: bool | |
688 :param encrypt_key: If True, the new copy of the object will | |
689 be encrypted on the server-side by S3 and will be stored | |
690 in an encrypted form while at rest in S3. | |
691 | |
692 :rtype: string | |
693 :return: The URL to access the key | |
694 """ | |
695 provider = self.bucket.connection.provider | |
696 version_id = version_id or self.version_id | |
697 if headers is None: | |
698 headers = {} | |
699 else: | |
700 headers = headers.copy() | |
701 | |
702 # add headers accordingly (usually PUT case) | |
703 if policy: | |
704 headers[provider.acl_header] = policy | |
705 if reduced_redundancy: | |
706 self.storage_class = 'REDUCED_REDUNDANCY' | |
707 if provider.storage_class_header: | |
708 headers[provider.storage_class_header] = self.storage_class | |
709 if encrypt_key: | |
710 headers[provider.server_side_encryption_header] = 'AES256' | |
711 headers = boto.utils.merge_meta(headers, self.metadata, provider) | |
712 | |
713 return self.bucket.connection.generate_url(expires_in, method, | |
714 self.bucket.name, self.name, | |
715 headers, query_auth, | |
716 force_http, | |
717 response_headers, | |
718 expires_in_absolute, | |
719 version_id) | |
720 | |
721 def send_file(self, fp, headers=None, cb=None, num_cb=10, | |
722 query_args=None, chunked_transfer=False, size=None): | |
723 """ | |
724 Upload a file to a key into a bucket on S3. | |
725 | |
726 :type fp: file | |
727 :param fp: The file pointer to upload. The file pointer must | |
728 point at the offset from which you wish to upload. | |
729 ie. if uploading the full file, it should point at the | |
730 start of the file. Normally when a file is opened for | |
731 reading, the fp will point at the first byte. See the | |
732 bytes parameter below for more info. | |
733 | |
734 :type headers: dict | |
735 :param headers: The headers to pass along with the PUT request | |
736 | |
737 :type num_cb: int | |
738 :param num_cb: (optional) If a callback is specified with the | |
739 cb parameter this parameter determines the granularity of | |
740 the callback by defining the maximum number of times the | |
741 callback will be called during the file | |
742 transfer. Providing a negative integer will cause your | |
743 callback to be called with each buffer read. | |
744 | |
745 :type query_args: string | |
746 :param query_args: (optional) Arguments to pass in the query string. | |
747 | |
748 :type chunked_transfer: boolean | |
749 :param chunked_transfer: (optional) If true, we use chunked | |
750 Transfer-Encoding. | |
751 | |
752 :type size: int | |
753 :param size: (optional) The Maximum number of bytes to read | |
754 from the file pointer (fp). This is useful when uploading | |
755 a file in multiple parts where you are splitting the file | |
756 up into different ranges to be uploaded. If not specified, | |
757 the default behaviour is to read all bytes from the file | |
758 pointer. Less bytes may be available. | |
759 """ | |
760 self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, | |
761 query_args=query_args, | |
762 chunked_transfer=chunked_transfer, size=size) | |
763 | |
764 def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10, | |
765 query_args=None, chunked_transfer=False, size=None, | |
766 hash_algs=None): | |
767 provider = self.bucket.connection.provider | |
768 try: | |
769 spos = fp.tell() | |
770 except IOError: | |
771 spos = None | |
772 self.read_from_stream = False | |
773 | |
774 # If hash_algs is unset and the MD5 hasn't already been computed, | |
775 # default to an MD5 hash_alg to hash the data on-the-fly. | |
776 if hash_algs is None and not self.md5: | |
777 hash_algs = {'md5': md5} | |
778 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) | |
779 | |
780 def sender(http_conn, method, path, data, headers): | |
781 # This function is called repeatedly for temporary retries | |
782 # so we must be sure the file pointer is pointing at the | |
783 # start of the data. | |
784 if spos is not None and spos != fp.tell(): | |
785 fp.seek(spos) | |
786 elif spos is None and self.read_from_stream: | |
787 # if seek is not supported, and we've read from this | |
788 # stream already, then we need to abort retries to | |
789 # avoid setting bad data. | |
790 raise provider.storage_data_error( | |
791 'Cannot retry failed request. fp does not support seeking.') | |
792 | |
793 # If the caller explicitly specified host header, tell putrequest | |
794 # not to add a second host header. Similarly for accept-encoding. | |
795 skips = {} | |
796 if boto.utils.find_matching_headers('host', headers): | |
797 skips['skip_host'] = 1 | |
798 if boto.utils.find_matching_headers('accept-encoding', headers): | |
799 skips['skip_accept_encoding'] = 1 | |
800 http_conn.putrequest(method, path, **skips) | |
801 for key in headers: | |
802 http_conn.putheader(key, headers[key]) | |
803 http_conn.endheaders() | |
804 | |
805 save_debug = self.bucket.connection.debug | |
806 self.bucket.connection.debug = 0 | |
807 # If the debuglevel < 4 we don't want to show connection | |
808 # payload, so turn off HTTP connection-level debug output (to | |
809 # be restored below). | |
810 # Use the getattr approach to allow this to work in AppEngine. | |
811 if getattr(http_conn, 'debuglevel', 0) < 4: | |
812 http_conn.set_debuglevel(0) | |
813 | |
814 data_len = 0 | |
815 if cb: | |
816 if size: | |
817 cb_size = size | |
818 elif self.size: | |
819 cb_size = self.size | |
820 else: | |
821 cb_size = 0 | |
822 if chunked_transfer and cb_size == 0: | |
823 # For chunked Transfer, we call the cb for every 1MB | |
824 # of data transferred, except when we know size. | |
825 cb_count = (1024 * 1024) / self.BufferSize | |
826 elif num_cb > 1: | |
827 cb_count = int( | |
828 math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) | |
829 elif num_cb < 0: | |
830 cb_count = -1 | |
831 else: | |
832 cb_count = 0 | |
833 i = 0 | |
834 cb(data_len, cb_size) | |
835 | |
836 bytes_togo = size | |
837 if bytes_togo and bytes_togo < self.BufferSize: | |
838 chunk = fp.read(bytes_togo) | |
839 else: | |
840 chunk = fp.read(self.BufferSize) | |
841 | |
842 if not isinstance(chunk, bytes): | |
843 chunk = chunk.encode('utf-8') | |
844 | |
845 if spos is None: | |
846 # read at least something from a non-seekable fp. | |
847 self.read_from_stream = True | |
848 while chunk: | |
849 chunk_len = len(chunk) | |
850 data_len += chunk_len | |
851 if chunked_transfer: | |
852 http_conn.send('%x;\r\n' % chunk_len) | |
853 http_conn.send(chunk) | |
854 http_conn.send('\r\n') | |
855 else: | |
856 http_conn.send(chunk) | |
857 for alg in digesters: | |
858 digesters[alg].update(chunk) | |
859 if bytes_togo: | |
860 bytes_togo -= chunk_len | |
861 if bytes_togo <= 0: | |
862 break | |
863 if cb: | |
864 i += 1 | |
865 if i == cb_count or cb_count == -1: | |
866 cb(data_len, cb_size) | |
867 i = 0 | |
868 if bytes_togo and bytes_togo < self.BufferSize: | |
869 chunk = fp.read(bytes_togo) | |
870 else: | |
871 chunk = fp.read(self.BufferSize) | |
872 | |
873 if not isinstance(chunk, bytes): | |
874 chunk = chunk.encode('utf-8') | |
875 | |
876 self.size = data_len | |
877 | |
878 for alg in digesters: | |
879 self.local_hashes[alg] = digesters[alg].digest() | |
880 | |
881 if chunked_transfer: | |
882 http_conn.send('0\r\n') | |
883 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) | |
884 http_conn.send('\r\n') | |
885 | |
886 if cb and (cb_count <= 1 or i > 0) and data_len > 0: | |
887 cb(data_len, cb_size) | |
888 | |
889 http_conn.set_debuglevel(save_debug) | |
890 self.bucket.connection.debug = save_debug | |
891 response = http_conn.getresponse() | |
892 body = response.read() | |
893 | |
894 if not self.should_retry(response, chunked_transfer): | |
895 raise provider.storage_response_error( | |
896 response.status, response.reason, body) | |
897 | |
898 return response | |
899 | |
900 if not headers: | |
901 headers = {} | |
902 else: | |
903 headers = headers.copy() | |
904 # Overwrite user-supplied user-agent. | |
905 for header in find_matching_headers('User-Agent', headers): | |
906 del headers[header] | |
907 headers['User-Agent'] = UserAgent | |
908 # If storage_class is None, then a user has not explicitly requested | |
909 # a storage class, so we can assume STANDARD here | |
910 if self._storage_class not in [None, 'STANDARD']: | |
911 headers[provider.storage_class_header] = self.storage_class | |
912 if find_matching_headers('Content-Encoding', headers): | |
913 self.content_encoding = merge_headers_by_name( | |
914 'Content-Encoding', headers) | |
915 if find_matching_headers('Content-Language', headers): | |
916 self.content_language = merge_headers_by_name( | |
917 'Content-Language', headers) | |
918 content_type_headers = find_matching_headers('Content-Type', headers) | |
919 if content_type_headers: | |
920 # Some use cases need to suppress sending of the Content-Type | |
921 # header and depend on the receiving server to set the content | |
922 # type. This can be achieved by setting headers['Content-Type'] | |
923 # to None when calling this method. | |
924 if (len(content_type_headers) == 1 and | |
925 headers[content_type_headers[0]] is None): | |
926 # Delete null Content-Type value to skip sending that header. | |
927 del headers[content_type_headers[0]] | |
928 else: | |
929 self.content_type = merge_headers_by_name( | |
930 'Content-Type', headers) | |
931 elif self.path: | |
932 self.content_type = mimetypes.guess_type(self.path)[0] | |
933 if self.content_type is None: | |
934 self.content_type = self.DefaultContentType | |
935 headers['Content-Type'] = self.content_type | |
936 else: | |
937 headers['Content-Type'] = self.content_type | |
938 if self.base64md5: | |
939 headers['Content-MD5'] = self.base64md5 | |
940 if chunked_transfer: | |
941 headers['Transfer-Encoding'] = 'chunked' | |
942 #if not self.base64md5: | |
943 # headers['Trailer'] = "Content-MD5" | |
944 else: | |
945 headers['Content-Length'] = str(self.size) | |
946 # This is terrible. We need a SHA256 of the body for SigV4, but to do | |
947 # the chunked ``sender`` behavior above, the ``fp`` isn't available to | |
948 # the auth mechanism (because closures). Detect if it's SigV4 & embelish | |
949 # while we can before the auth calculations occur. | |
950 if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability(): | |
951 kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256} | |
952 if size is not None: | |
953 kwargs['size'] = size | |
954 headers['_sha256'] = compute_hash(**kwargs)[0] | |
955 headers['Expect'] = '100-Continue' | |
956 headers = boto.utils.merge_meta(headers, self.metadata, provider) | |
957 resp = self.bucket.connection.make_request( | |
958 'PUT', | |
959 self.bucket.name, | |
960 self.name, | |
961 headers, | |
962 sender=sender, | |
963 query_args=query_args | |
964 ) | |
965 self.handle_version_headers(resp, force=True) | |
966 self.handle_addl_headers(resp.getheaders()) | |
967 | |
968 def should_retry(self, response, chunked_transfer=False): | |
969 provider = self.bucket.connection.provider | |
970 | |
971 if not chunked_transfer: | |
972 if response.status in [500, 503]: | |
973 # 500 & 503 can be plain retries. | |
974 return True | |
975 | |
976 if response.getheader('location'): | |
977 # If there's a redirect, plain retry. | |
978 return True | |
979 | |
980 if 200 <= response.status <= 299: | |
981 self.etag = response.getheader('etag') | |
982 md5 = self.md5 | |
983 if isinstance(md5, bytes): | |
984 md5 = md5.decode('utf-8') | |
985 | |
986 # If you use customer-provided encryption keys, the ETag value that | |
987 # Amazon S3 returns in the response will not be the MD5 of the | |
988 # object. | |
989 amz_server_side_encryption_customer_algorithm = response.getheader( | |
990 'x-amz-server-side-encryption-customer-algorithm', None) | |
991 # The same is applicable for KMS-encrypted objects in gs buckets. | |
992 goog_customer_managed_encryption = response.getheader( | |
993 'x-goog-encryption-kms-key-name', None) | |
994 if (amz_server_side_encryption_customer_algorithm is None and | |
995 goog_customer_managed_encryption is None): | |
996 if self.etag != '"%s"' % md5: | |
997 raise provider.storage_data_error( | |
998 'ETag from S3 did not match computed MD5. ' | |
999 '%s vs. %s' % (self.etag, self.md5)) | |
1000 | |
1001 return True | |
1002 | |
1003 if response.status == 400: | |
1004 # The 400 must be trapped so the retry handler can check to | |
1005 # see if it was a timeout. | |
1006 # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb | |
1007 # out. | |
1008 body = response.read() | |
1009 err = provider.storage_response_error( | |
1010 response.status, | |
1011 response.reason, | |
1012 body | |
1013 ) | |
1014 | |
1015 if err.error_code in ['RequestTimeout']: | |
1016 raise PleaseRetryException( | |
1017 "Saw %s, retrying" % err.error_code, | |
1018 response=response | |
1019 ) | |
1020 | |
1021 return False | |
1022 | |
1023 def compute_md5(self, fp, size=None): | |
1024 """ | |
1025 :type fp: file | |
1026 :param fp: File pointer to the file to MD5 hash. The file | |
1027 pointer will be reset to the same position before the | |
1028 method returns. | |
1029 | |
1030 :type size: int | |
1031 :param size: (optional) The Maximum number of bytes to read | |
1032 from the file pointer (fp). This is useful when uploading | |
1033 a file in multiple parts where the file is being split | |
1034 in place into different parts. Less bytes may be available. | |
1035 """ | |
1036 hex_digest, b64_digest, data_size = compute_md5(fp, size=size) | |
1037 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. | |
1038 # The internal implementation of compute_md5() needs to return the | |
1039 # data size but we don't want to return that value to the external | |
1040 # caller because it changes the class interface (i.e. it might | |
1041 # break some code) so we consume the third tuple value here and | |
1042 # return the remainder of the tuple to the caller, thereby preserving | |
1043 # the existing interface. | |
1044 self.size = data_size | |
1045 return (hex_digest, b64_digest) | |
1046 | |
1047 def set_contents_from_stream(self, fp, headers=None, replace=True, | |
1048 cb=None, num_cb=10, policy=None, | |
1049 reduced_redundancy=False, query_args=None, | |
1050 size=None): | |
1051 """ | |
1052 Store an object using the name of the Key object as the key in | |
1053 cloud and the contents of the data stream pointed to by 'fp' as | |
1054 the contents. | |
1055 | |
1056 The stream object is not seekable and total size is not known. | |
1057 This has the implication that we can't specify the | |
1058 Content-Size and Content-MD5 in the header. So for huge | |
1059 uploads, the delay in calculating MD5 is avoided but with a | |
1060 penalty of inability to verify the integrity of the uploaded | |
1061 data. | |
1062 | |
1063 :type fp: file | |
1064 :param fp: the file whose contents are to be uploaded | |
1065 | |
1066 :type headers: dict | |
1067 :param headers: additional HTTP headers to be sent with the | |
1068 PUT request. | |
1069 | |
1070 :type replace: bool | |
1071 :param replace: If this parameter is False, the method will first check | |
1072 to see if an object exists in the bucket with the same key. If it | |
1073 does, it won't overwrite it. The default value is True which will | |
1074 overwrite the object. | |
1075 | |
1076 :type cb: function | |
1077 :param cb: a callback function that will be called to report | |
1078 progress on the upload. The callback should accept two integer | |
1079 parameters, the first representing the number of bytes that have | |
1080 been successfully transmitted to GS and the second representing the | |
1081 total number of bytes that need to be transmitted. | |
1082 | |
1083 :type num_cb: int | |
1084 :param num_cb: (optional) If a callback is specified with the | |
1085 cb parameter, this parameter determines the granularity of | |
1086 the callback by defining the maximum number of times the | |
1087 callback will be called during the file transfer. | |
1088 | |
1089 :type policy: :class:`boto.gs.acl.CannedACLStrings` | |
1090 :param policy: A canned ACL policy that will be applied to the new key | |
1091 in GS. | |
1092 | |
1093 :type reduced_redundancy: bool | |
1094 :param reduced_redundancy: If True, this will set the storage | |
1095 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
1096 Redundancy Storage (RRS) feature of S3, provides lower | |
1097 redundancy at lower storage cost. | |
1098 | |
1099 :type size: int | |
1100 :param size: (optional) The Maximum number of bytes to read from | |
1101 the file pointer (fp). This is useful when uploading a | |
1102 file in multiple parts where you are splitting the file up | |
1103 into different ranges to be uploaded. If not specified, | |
1104 the default behaviour is to read all bytes from the file | |
1105 pointer. Less bytes may be available. | |
1106 """ | |
1107 | |
1108 provider = self.bucket.connection.provider | |
1109 if not provider.supports_chunked_transfer(): | |
1110 raise BotoClientError('%s does not support chunked transfer' | |
1111 % provider.get_provider_name()) | |
1112 | |
1113 # Name of the Object should be specified explicitly for Streams. | |
1114 if not self.name or self.name == '': | |
1115 raise BotoClientError('Cannot determine the destination ' | |
1116 'object name for the given stream') | |
1117 | |
1118 if headers is None: | |
1119 headers = {} | |
1120 if policy: | |
1121 headers[provider.acl_header] = policy | |
1122 | |
1123 if reduced_redundancy: | |
1124 self.storage_class = 'REDUCED_REDUNDANCY' | |
1125 if provider.storage_class_header: | |
1126 headers[provider.storage_class_header] = self.storage_class | |
1127 | |
1128 if self.bucket is not None: | |
1129 if not replace: | |
1130 if self.bucket.lookup(self.name): | |
1131 return | |
1132 self.send_file(fp, headers, cb, num_cb, query_args, | |
1133 chunked_transfer=True, size=size) | |
1134 | |
1135 def set_contents_from_file(self, fp, headers=None, replace=True, | |
1136 cb=None, num_cb=10, policy=None, md5=None, | |
1137 reduced_redundancy=False, query_args=None, | |
1138 encrypt_key=False, size=None, rewind=False): | |
1139 """ | |
1140 Store an object in S3 using the name of the Key object as the | |
1141 key in S3 and the contents of the file pointed to by 'fp' as the | |
1142 contents. The data is read from 'fp' from its current position until | |
1143 'size' bytes have been read or EOF. | |
1144 | |
1145 :type fp: file | |
1146 :param fp: the file whose contents to upload | |
1147 | |
1148 :type headers: dict | |
1149 :param headers: Additional HTTP headers that will be sent with | |
1150 the PUT request. | |
1151 | |
1152 :type replace: bool | |
1153 :param replace: If this parameter is False, the method will | |
1154 first check to see if an object exists in the bucket with | |
1155 the same key. If it does, it won't overwrite it. The | |
1156 default value is True which will overwrite the object. | |
1157 | |
1158 :type cb: function | |
1159 :param cb: a callback function that will be called to report | |
1160 progress on the upload. The callback should accept two | |
1161 integer parameters, the first representing the number of | |
1162 bytes that have been successfully transmitted to S3 and | |
1163 the second representing the size of the to be transmitted | |
1164 object. | |
1165 | |
1166 :type num_cb: int | |
1167 :param num_cb: (optional) If a callback is specified with the | |
1168 cb parameter this parameter determines the granularity of | |
1169 the callback by defining the maximum number of times the | |
1170 callback will be called during the file transfer. | |
1171 | |
1172 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
1173 :param policy: A canned ACL policy that will be applied to the | |
1174 new key in S3. | |
1175 | |
1176 :type md5: A tuple containing the hexdigest version of the MD5 | |
1177 checksum of the file as the first element and the | |
1178 Base64-encoded version of the plain checksum as the second | |
1179 element. This is the same format returned by the | |
1180 compute_md5 method. | |
1181 :param md5: If you need to compute the MD5 for any reason | |
1182 prior to upload, it's silly to have to do it twice so this | |
1183 param, if present, will be used as the MD5 values of the | |
1184 file. Otherwise, the checksum will be computed. | |
1185 | |
1186 :type reduced_redundancy: bool | |
1187 :param reduced_redundancy: If True, this will set the storage | |
1188 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
1189 Redundancy Storage (RRS) feature of S3, provides lower | |
1190 redundancy at lower storage cost. | |
1191 | |
1192 :type encrypt_key: bool | |
1193 :param encrypt_key: If True, the new copy of the object will | |
1194 be encrypted on the server-side by S3 and will be stored | |
1195 in an encrypted form while at rest in S3. | |
1196 | |
1197 :type size: int | |
1198 :param size: (optional) The Maximum number of bytes to read | |
1199 from the file pointer (fp). This is useful when uploading | |
1200 a file in multiple parts where you are splitting the file | |
1201 up into different ranges to be uploaded. If not specified, | |
1202 the default behaviour is to read all bytes from the file | |
1203 pointer. Less bytes may be available. | |
1204 | |
1205 :type rewind: bool | |
1206 :param rewind: (optional) If True, the file pointer (fp) will | |
1207 be rewound to the start before any bytes are read from | |
1208 it. The default behaviour is False which reads from the | |
1209 current position of the file pointer (fp). | |
1210 | |
1211 :rtype: int | |
1212 :return: The number of bytes written to the key. | |
1213 """ | |
1214 provider = self.bucket.connection.provider | |
1215 headers = headers or {} | |
1216 if policy: | |
1217 headers[provider.acl_header] = policy | |
1218 if encrypt_key: | |
1219 headers[provider.server_side_encryption_header] = 'AES256' | |
1220 | |
1221 if rewind: | |
1222 # caller requests reading from beginning of fp. | |
1223 fp.seek(0, os.SEEK_SET) | |
1224 else: | |
1225 # The following seek/tell/seek logic is intended | |
1226 # to detect applications using the older interface to | |
1227 # set_contents_from_file(), which automatically rewound the | |
1228 # file each time the Key was reused. This changed with commit | |
1229 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads | |
1230 # split into multiple parts and uploaded in parallel, and at | |
1231 # the time of that commit this check was added because otherwise | |
1232 # older programs would get a success status and upload an empty | |
1233 # object. Unfortuantely, it's very inefficient for fp's implemented | |
1234 # by KeyFile (used, for example, by gsutil when copying between | |
1235 # providers). So, we skip the check for the KeyFile case. | |
1236 # TODO: At some point consider removing this seek/tell/seek | |
1237 # logic, after enough time has passed that it's unlikely any | |
1238 # programs remain that assume the older auto-rewind interface. | |
1239 if not isinstance(fp, KeyFile): | |
1240 spos = fp.tell() | |
1241 fp.seek(0, os.SEEK_END) | |
1242 if fp.tell() == spos: | |
1243 fp.seek(0, os.SEEK_SET) | |
1244 if fp.tell() != spos: | |
1245 # Raise an exception as this is likely a programming | |
1246 # error whereby there is data before the fp but nothing | |
1247 # after it. | |
1248 fp.seek(spos) | |
1249 raise AttributeError('fp is at EOF. Use rewind option ' | |
1250 'or seek() to data start.') | |
1251 # seek back to the correct position. | |
1252 fp.seek(spos) | |
1253 | |
1254 if reduced_redundancy: | |
1255 self.storage_class = 'REDUCED_REDUNDANCY' | |
1256 if provider.storage_class_header: | |
1257 headers[provider.storage_class_header] = self.storage_class | |
1258 # TODO - What if provider doesn't support reduced reduncancy? | |
1259 # What if different providers provide different classes? | |
1260 if hasattr(fp, 'name'): | |
1261 self.path = fp.name | |
1262 if self.bucket is not None: | |
1263 if not md5 and provider.supports_chunked_transfer(): | |
1264 # defer md5 calculation to on the fly and | |
1265 # we don't know anything about size yet. | |
1266 chunked_transfer = True | |
1267 self.size = None | |
1268 else: | |
1269 chunked_transfer = False | |
1270 if isinstance(fp, KeyFile): | |
1271 # Avoid EOF seek for KeyFile case as it's very inefficient. | |
1272 key = fp.getkey() | |
1273 size = key.size - fp.tell() | |
1274 self.size = size | |
1275 # At present both GCS and S3 use MD5 for the etag for | |
1276 # non-multipart-uploaded objects. If the etag is 32 hex | |
1277 # chars use it as an MD5, to avoid having to read the file | |
1278 # twice while transferring. | |
1279 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): | |
1280 etag = key.etag.strip('"') | |
1281 md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) | |
1282 if not md5: | |
1283 # compute_md5() and also set self.size to actual | |
1284 # size of the bytes read computing the md5. | |
1285 md5 = self.compute_md5(fp, size) | |
1286 # adjust size if required | |
1287 size = self.size | |
1288 elif size: | |
1289 self.size = size | |
1290 else: | |
1291 # If md5 is provided, still need to size so | |
1292 # calculate based on bytes to end of content | |
1293 spos = fp.tell() | |
1294 fp.seek(0, os.SEEK_END) | |
1295 self.size = fp.tell() - spos | |
1296 fp.seek(spos) | |
1297 size = self.size | |
1298 self.md5 = md5[0] | |
1299 self.base64md5 = md5[1] | |
1300 | |
1301 if self.name is None: | |
1302 self.name = self.md5 | |
1303 if not replace: | |
1304 if self.bucket.lookup(self.name): | |
1305 return | |
1306 | |
1307 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, | |
1308 query_args=query_args, | |
1309 chunked_transfer=chunked_transfer, size=size) | |
1310 # return number of bytes written. | |
1311 return self.size | |
1312 | |
1313 def set_contents_from_filename(self, filename, headers=None, replace=True, | |
1314 cb=None, num_cb=10, policy=None, md5=None, | |
1315 reduced_redundancy=False, | |
1316 encrypt_key=False): | |
1317 """ | |
1318 Store an object in S3 using the name of the Key object as the | |
1319 key in S3 and the contents of the file named by 'filename'. | |
1320 See set_contents_from_file method for details about the | |
1321 parameters. | |
1322 | |
1323 :type filename: string | |
1324 :param filename: The name of the file that you want to put onto S3 | |
1325 | |
1326 :type headers: dict | |
1327 :param headers: Additional headers to pass along with the | |
1328 request to AWS. | |
1329 | |
1330 :type replace: bool | |
1331 :param replace: If True, replaces the contents of the file | |
1332 if it already exists. | |
1333 | |
1334 :type cb: function | |
1335 :param cb: a callback function that will be called to report | |
1336 progress on the upload. The callback should accept two | |
1337 integer parameters, the first representing the number of | |
1338 bytes that have been successfully transmitted to S3 and | |
1339 the second representing the size of the to be transmitted | |
1340 object. | |
1341 | |
1342 :type cb: int | |
1343 :param num_cb: (optional) If a callback is specified with the | |
1344 cb parameter this parameter determines the granularity of | |
1345 the callback by defining the maximum number of times the | |
1346 callback will be called during the file transfer. | |
1347 | |
1348 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
1349 :param policy: A canned ACL policy that will be applied to the | |
1350 new key in S3. | |
1351 | |
1352 :type md5: A tuple containing the hexdigest version of the MD5 | |
1353 checksum of the file as the first element and the | |
1354 Base64-encoded version of the plain checksum as the second | |
1355 element. This is the same format returned by the | |
1356 compute_md5 method. | |
1357 :param md5: If you need to compute the MD5 for any reason | |
1358 prior to upload, it's silly to have to do it twice so this | |
1359 param, if present, will be used as the MD5 values of the | |
1360 file. Otherwise, the checksum will be computed. | |
1361 | |
1362 :type reduced_redundancy: bool | |
1363 :param reduced_redundancy: If True, this will set the storage | |
1364 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
1365 Redundancy Storage (RRS) feature of S3, provides lower | |
1366 redundancy at lower storage cost. :type encrypt_key: bool | |
1367 :param encrypt_key: If True, the new copy of the object | |
1368 will be encrypted on the server-side by S3 and will be | |
1369 stored in an encrypted form while at rest in S3. | |
1370 | |
1371 :rtype: int | |
1372 :return: The number of bytes written to the key. | |
1373 """ | |
1374 with open(filename, 'rb') as fp: | |
1375 return self.set_contents_from_file(fp, headers, replace, cb, | |
1376 num_cb, policy, md5, | |
1377 reduced_redundancy, | |
1378 encrypt_key=encrypt_key) | |
1379 | |
1380 def set_contents_from_string(self, string_data, headers=None, replace=True, | |
1381 cb=None, num_cb=10, policy=None, md5=None, | |
1382 reduced_redundancy=False, | |
1383 encrypt_key=False): | |
1384 """ | |
1385 Store an object in S3 using the name of the Key object as the | |
1386 key in S3 and the string 's' as the contents. | |
1387 See set_contents_from_file method for details about the | |
1388 parameters. | |
1389 | |
1390 :type headers: dict | |
1391 :param headers: Additional headers to pass along with the | |
1392 request to AWS. | |
1393 | |
1394 :type replace: bool | |
1395 :param replace: If True, replaces the contents of the file if | |
1396 it already exists. | |
1397 | |
1398 :type cb: function | |
1399 :param cb: a callback function that will be called to report | |
1400 progress on the upload. The callback should accept two | |
1401 integer parameters, the first representing the number of | |
1402 bytes that have been successfully transmitted to S3 and | |
1403 the second representing the size of the to be transmitted | |
1404 object. | |
1405 | |
1406 :type num_cb: int | |
1407 :param num_cb: (optional) If a callback is specified with the | |
1408 num_cb parameter this parameter determines the granularity of | |
1409 the callback by defining the maximum number of times the | |
1410 callback will be called during the file transfer. | |
1411 | |
1412 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
1413 :param policy: A canned ACL policy that will be applied to the | |
1414 new key in S3. | |
1415 | |
1416 :type md5: A tuple containing the hexdigest version of the MD5 | |
1417 checksum of the file as the first element and the | |
1418 Base64-encoded version of the plain checksum as the second | |
1419 element. This is the same format returned by the | |
1420 compute_md5 method. | |
1421 :param md5: If you need to compute the MD5 for any reason | |
1422 prior to upload, it's silly to have to do it twice so this | |
1423 param, if present, will be used as the MD5 values of the | |
1424 file. Otherwise, the checksum will be computed. | |
1425 | |
1426 :type reduced_redundancy: bool | |
1427 :param reduced_redundancy: If True, this will set the storage | |
1428 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
1429 Redundancy Storage (RRS) feature of S3, provides lower | |
1430 redundancy at lower storage cost. | |
1431 | |
1432 :type encrypt_key: bool | |
1433 :param encrypt_key: If True, the new copy of the object will | |
1434 be encrypted on the server-side by S3 and will be stored | |
1435 in an encrypted form while at rest in S3. | |
1436 """ | |
1437 if not isinstance(string_data, bytes): | |
1438 string_data = string_data.encode("utf-8") | |
1439 fp = BytesIO(string_data) | |
1440 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, | |
1441 policy, md5, reduced_redundancy, | |
1442 encrypt_key=encrypt_key) | |
1443 fp.close() | |
1444 return r | |
1445 | |
1446 def get_file(self, fp, headers=None, cb=None, num_cb=10, | |
1447 torrent=False, version_id=None, override_num_retries=None, | |
1448 response_headers=None): | |
1449 """ | |
1450 Retrieves a file from an S3 Key | |
1451 | |
1452 :type fp: file | |
1453 :param fp: File pointer to put the data into | |
1454 | |
1455 :type headers: string | |
1456 :param: headers to send when retrieving the files | |
1457 | |
1458 :type cb: function | |
1459 :param cb: a callback function that will be called to report | |
1460 progress on the upload. The callback should accept two | |
1461 integer parameters, the first representing the number of | |
1462 bytes that have been successfully transmitted to S3 and | |
1463 the second representing the size of the to be transmitted | |
1464 object. | |
1465 | |
1466 :type cb: int | |
1467 :param num_cb: (optional) If a callback is specified with the | |
1468 cb parameter this parameter determines the granularity of | |
1469 the callback by defining the maximum number of times the | |
1470 callback will be called during the file transfer. | |
1471 | |
1472 :type torrent: bool | |
1473 :param torrent: Flag for whether to get a torrent for the file | |
1474 | |
1475 :type override_num_retries: int | |
1476 :param override_num_retries: If not None will override configured | |
1477 num_retries parameter for underlying GET. | |
1478 | |
1479 :type response_headers: dict | |
1480 :param response_headers: A dictionary containing HTTP | |
1481 headers/values that will override any headers associated | |
1482 with the stored object in the response. See | |
1483 http://goo.gl/EWOPb for details. | |
1484 | |
1485 :type version_id: str | |
1486 :param version_id: The ID of a particular version of the object. | |
1487 If this parameter is not supplied but the Key object has | |
1488 a ``version_id`` attribute, that value will be used when | |
1489 retrieving the object. You can set the Key object's | |
1490 ``version_id`` attribute to None to always grab the latest | |
1491 version from a version-enabled bucket. | |
1492 """ | |
1493 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, | |
1494 torrent=torrent, version_id=version_id, | |
1495 override_num_retries=override_num_retries, | |
1496 response_headers=response_headers, | |
1497 hash_algs=None, | |
1498 query_args=None) | |
1499 | |
1500 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, | |
1501 torrent=False, version_id=None, override_num_retries=None, | |
1502 response_headers=None, hash_algs=None, query_args=None): | |
1503 if headers is None: | |
1504 headers = {} | |
1505 save_debug = self.bucket.connection.debug | |
1506 if self.bucket.connection.debug == 1: | |
1507 self.bucket.connection.debug = 0 | |
1508 | |
1509 query_args = query_args or [] | |
1510 if torrent: | |
1511 query_args.append('torrent') | |
1512 | |
1513 if hash_algs is None and not torrent: | |
1514 hash_algs = {'md5': md5} | |
1515 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) | |
1516 | |
1517 # If a version_id is passed in, use that. If not, check to see | |
1518 # if the Key object has an explicit version_id and, if so, use that. | |
1519 # Otherwise, don't pass a version_id query param. | |
1520 if version_id is None: | |
1521 version_id = self.version_id | |
1522 if version_id: | |
1523 query_args.append('versionId=%s' % version_id) | |
1524 if response_headers: | |
1525 for key in response_headers: | |
1526 query_args.append('%s=%s' % ( | |
1527 key, urllib.parse.quote(response_headers[key]))) | |
1528 query_args = '&'.join(query_args) | |
1529 self.open('r', headers, query_args=query_args, | |
1530 override_num_retries=override_num_retries) | |
1531 | |
1532 data_len = 0 | |
1533 if cb: | |
1534 if self.size is None: | |
1535 cb_size = 0 | |
1536 else: | |
1537 cb_size = self.size | |
1538 if self.size is None and num_cb != -1: | |
1539 # If size is not available due to chunked transfer for example, | |
1540 # we'll call the cb for every 1MB of data transferred. | |
1541 cb_count = (1024 * 1024) / self.BufferSize | |
1542 elif num_cb > 1: | |
1543 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) | |
1544 elif num_cb < 0: | |
1545 cb_count = -1 | |
1546 else: | |
1547 cb_count = 0 | |
1548 i = 0 | |
1549 cb(data_len, cb_size) | |
1550 try: | |
1551 for bytes in self: | |
1552 fp.write(bytes) | |
1553 data_len += len(bytes) | |
1554 for alg in digesters: | |
1555 digesters[alg].update(bytes) | |
1556 if cb: | |
1557 if cb_size > 0 and data_len >= cb_size: | |
1558 break | |
1559 i += 1 | |
1560 if i == cb_count or cb_count == -1: | |
1561 cb(data_len, cb_size) | |
1562 i = 0 | |
1563 except IOError as e: | |
1564 if e.errno == errno.ENOSPC: | |
1565 raise StorageDataError('Out of space for destination file ' | |
1566 '%s' % fp.name) | |
1567 raise | |
1568 if cb and (cb_count <= 1 or i > 0) and data_len > 0: | |
1569 cb(data_len, cb_size) | |
1570 for alg in digesters: | |
1571 self.local_hashes[alg] = digesters[alg].digest() | |
1572 if self.size is None and not torrent and "Range" not in headers: | |
1573 self.size = data_len | |
1574 self.close() | |
1575 self.bucket.connection.debug = save_debug | |
1576 | |
1577 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): | |
1578 """ | |
1579 Get a torrent file (see to get_file) | |
1580 | |
1581 :type fp: file | |
1582 :param fp: The file pointer of where to put the torrent | |
1583 | |
1584 :type headers: dict | |
1585 :param headers: Headers to be passed | |
1586 | |
1587 :type cb: function | |
1588 :param cb: a callback function that will be called to report | |
1589 progress on the upload. The callback should accept two | |
1590 integer parameters, the first representing the number of | |
1591 bytes that have been successfully transmitted to S3 and | |
1592 the second representing the size of the to be transmitted | |
1593 object. | |
1594 | |
1595 :type cb: int | |
1596 :param num_cb: (optional) If a callback is specified with the | |
1597 cb parameter this parameter determines the granularity of | |
1598 the callback by defining the maximum number of times the | |
1599 callback will be called during the file transfer. | |
1600 | |
1601 """ | |
1602 return self.get_file(fp, headers, cb, num_cb, torrent=True) | |
1603 | |
1604 def get_contents_to_file(self, fp, headers=None, | |
1605 cb=None, num_cb=10, | |
1606 torrent=False, | |
1607 version_id=None, | |
1608 res_download_handler=None, | |
1609 response_headers=None): | |
1610 """ | |
1611 Retrieve an object from S3 using the name of the Key object as the | |
1612 key in S3. Write the contents of the object to the file pointed | |
1613 to by 'fp'. | |
1614 | |
1615 :type fp: File -like object | |
1616 :param fp: | |
1617 | |
1618 :type headers: dict | |
1619 :param headers: additional HTTP headers that will be sent with | |
1620 the GET request. | |
1621 | |
1622 :type cb: function | |
1623 :param cb: a callback function that will be called to report | |
1624 progress on the upload. The callback should accept two | |
1625 integer parameters, the first representing the number of | |
1626 bytes that have been successfully transmitted to S3 and | |
1627 the second representing the size of the to be transmitted | |
1628 object. | |
1629 | |
1630 :type cb: int | |
1631 :param num_cb: (optional) If a callback is specified with the | |
1632 cb parameter this parameter determines the granularity of | |
1633 the callback by defining the maximum number of times the | |
1634 callback will be called during the file transfer. | |
1635 | |
1636 :type torrent: bool | |
1637 :param torrent: If True, returns the contents of a torrent | |
1638 file as a string. | |
1639 | |
1640 :type res_upload_handler: ResumableDownloadHandler | |
1641 :param res_download_handler: If provided, this handler will | |
1642 perform the download. | |
1643 | |
1644 :type response_headers: dict | |
1645 :param response_headers: A dictionary containing HTTP | |
1646 headers/values that will override any headers associated | |
1647 with the stored object in the response. See | |
1648 http://goo.gl/EWOPb for details. | |
1649 | |
1650 :type version_id: str | |
1651 :param version_id: The ID of a particular version of the object. | |
1652 If this parameter is not supplied but the Key object has | |
1653 a ``version_id`` attribute, that value will be used when | |
1654 retrieving the object. You can set the Key object's | |
1655 ``version_id`` attribute to None to always grab the latest | |
1656 version from a version-enabled bucket. | |
1657 """ | |
1658 if self.bucket is not None: | |
1659 if res_download_handler: | |
1660 res_download_handler.get_file(self, fp, headers, cb, num_cb, | |
1661 torrent=torrent, | |
1662 version_id=version_id) | |
1663 else: | |
1664 self.get_file(fp, headers, cb, num_cb, torrent=torrent, | |
1665 version_id=version_id, | |
1666 response_headers=response_headers) | |
1667 | |
1668 def get_contents_to_filename(self, filename, headers=None, | |
1669 cb=None, num_cb=10, | |
1670 torrent=False, | |
1671 version_id=None, | |
1672 res_download_handler=None, | |
1673 response_headers=None): | |
1674 """ | |
1675 Retrieve an object from S3 using the name of the Key object as the | |
1676 key in S3. Store contents of the object to a file named by 'filename'. | |
1677 See get_contents_to_file method for details about the | |
1678 parameters. | |
1679 | |
1680 :type filename: string | |
1681 :param filename: The filename of where to put the file contents | |
1682 | |
1683 :type headers: dict | |
1684 :param headers: Any additional headers to send in the request | |
1685 | |
1686 :type cb: function | |
1687 :param cb: a callback function that will be called to report | |
1688 progress on the upload. The callback should accept two | |
1689 integer parameters, the first representing the number of | |
1690 bytes that have been successfully transmitted to S3 and | |
1691 the second representing the size of the to be transmitted | |
1692 object. | |
1693 | |
1694 :type num_cb: int | |
1695 :param num_cb: (optional) If a callback is specified with the | |
1696 cb parameter this parameter determines the granularity of | |
1697 the callback by defining the maximum number of times the | |
1698 callback will be called during the file transfer. | |
1699 | |
1700 :type torrent: bool | |
1701 :param torrent: If True, returns the contents of a torrent file | |
1702 as a string. | |
1703 | |
1704 :type res_upload_handler: ResumableDownloadHandler | |
1705 :param res_download_handler: If provided, this handler will | |
1706 perform the download. | |
1707 | |
1708 :type response_headers: dict | |
1709 :param response_headers: A dictionary containing HTTP | |
1710 headers/values that will override any headers associated | |
1711 with the stored object in the response. See | |
1712 http://goo.gl/EWOPb for details. | |
1713 | |
1714 :type version_id: str | |
1715 :param version_id: The ID of a particular version of the object. | |
1716 If this parameter is not supplied but the Key object has | |
1717 a ``version_id`` attribute, that value will be used when | |
1718 retrieving the object. You can set the Key object's | |
1719 ``version_id`` attribute to None to always grab the latest | |
1720 version from a version-enabled bucket. | |
1721 """ | |
1722 try: | |
1723 with open(filename, 'wb') as fp: | |
1724 self.get_contents_to_file(fp, headers, cb, num_cb, | |
1725 torrent=torrent, | |
1726 version_id=version_id, | |
1727 res_download_handler=res_download_handler, | |
1728 response_headers=response_headers) | |
1729 except Exception: | |
1730 os.remove(filename) | |
1731 raise | |
1732 # if last_modified date was sent from s3, try to set file's timestamp | |
1733 if self.last_modified is not None: | |
1734 try: | |
1735 modified_tuple = email.utils.parsedate_tz(self.last_modified) | |
1736 modified_stamp = int(email.utils.mktime_tz(modified_tuple)) | |
1737 os.utime(fp.name, (modified_stamp, modified_stamp)) | |
1738 except Exception: | |
1739 pass | |
1740 | |
1741 def get_contents_as_string(self, headers=None, | |
1742 cb=None, num_cb=10, | |
1743 torrent=False, | |
1744 version_id=None, | |
1745 response_headers=None, encoding=None): | |
1746 """ | |
1747 Retrieve an object from S3 using the name of the Key object as the | |
1748 key in S3. Return the contents of the object as a string. | |
1749 See get_contents_to_file method for details about the | |
1750 parameters. | |
1751 | |
1752 :type headers: dict | |
1753 :param headers: Any additional headers to send in the request | |
1754 | |
1755 :type cb: function | |
1756 :param cb: a callback function that will be called to report | |
1757 progress on the upload. The callback should accept two | |
1758 integer parameters, the first representing the number of | |
1759 bytes that have been successfully transmitted to S3 and | |
1760 the second representing the size of the to be transmitted | |
1761 object. | |
1762 | |
1763 :type cb: int | |
1764 :param num_cb: (optional) If a callback is specified with the | |
1765 cb parameter this parameter determines the granularity of | |
1766 the callback by defining the maximum number of times the | |
1767 callback will be called during the file transfer. | |
1768 | |
1769 :type torrent: bool | |
1770 :param torrent: If True, returns the contents of a torrent file | |
1771 as a string. | |
1772 | |
1773 :type response_headers: dict | |
1774 :param response_headers: A dictionary containing HTTP | |
1775 headers/values that will override any headers associated | |
1776 with the stored object in the response. See | |
1777 http://goo.gl/EWOPb for details. | |
1778 | |
1779 :type version_id: str | |
1780 :param version_id: The ID of a particular version of the object. | |
1781 If this parameter is not supplied but the Key object has | |
1782 a ``version_id`` attribute, that value will be used when | |
1783 retrieving the object. You can set the Key object's | |
1784 ``version_id`` attribute to None to always grab the latest | |
1785 version from a version-enabled bucket. | |
1786 | |
1787 :type encoding: str | |
1788 :param encoding: The text encoding to use, such as ``utf-8`` | |
1789 or ``iso-8859-1``. If set, then a string will be returned. | |
1790 Defaults to ``None`` and returns bytes. | |
1791 | |
1792 :rtype: bytes or str | |
1793 :returns: The contents of the file as bytes or a string | |
1794 """ | |
1795 fp = BytesIO() | |
1796 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, | |
1797 version_id=version_id, | |
1798 response_headers=response_headers) | |
1799 value = fp.getvalue() | |
1800 | |
1801 if encoding is not None: | |
1802 value = value.decode(encoding) | |
1803 | |
1804 return value | |
1805 | |
1806 def add_email_grant(self, permission, email_address, headers=None): | |
1807 """ | |
1808 Convenience method that provides a quick way to add an email grant | |
1809 to a key. This method retrieves the current ACL, creates a new | |
1810 grant based on the parameters passed in, adds that grant to the ACL | |
1811 and then PUT's the new ACL back to S3. | |
1812 | |
1813 :type permission: string | |
1814 :param permission: The permission being granted. Should be one of: | |
1815 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). | |
1816 | |
1817 :type email_address: string | |
1818 :param email_address: The email address associated with the AWS | |
1819 account your are granting the permission to. | |
1820 | |
1821 :type recursive: boolean | |
1822 :param recursive: A boolean value to controls whether the | |
1823 command will apply the grant to all keys within the bucket | |
1824 or not. The default value is False. By passing a True | |
1825 value, the call will iterate through all keys in the | |
1826 bucket and apply the same grant to each key. CAUTION: If | |
1827 you have a lot of keys, this could take a long time! | |
1828 """ | |
1829 policy = self.get_acl(headers=headers) | |
1830 policy.acl.add_email_grant(permission, email_address) | |
1831 self.set_acl(policy, headers=headers) | |
1832 | |
1833 def add_user_grant(self, permission, user_id, headers=None, | |
1834 display_name=None): | |
1835 """ | |
1836 Convenience method that provides a quick way to add a canonical | |
1837 user grant to a key. This method retrieves the current ACL, | |
1838 creates a new grant based on the parameters passed in, adds that | |
1839 grant to the ACL and then PUT's the new ACL back to S3. | |
1840 | |
1841 :type permission: string | |
1842 :param permission: The permission being granted. Should be one of: | |
1843 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). | |
1844 | |
1845 :type user_id: string | |
1846 :param user_id: The canonical user id associated with the AWS | |
1847 account your are granting the permission to. | |
1848 | |
1849 :type display_name: string | |
1850 :param display_name: An option string containing the user's | |
1851 Display Name. Only required on Walrus. | |
1852 """ | |
1853 policy = self.get_acl(headers=headers) | |
1854 policy.acl.add_user_grant(permission, user_id, | |
1855 display_name=display_name) | |
1856 self.set_acl(policy, headers=headers) | |
1857 | |
1858 def _normalize_metadata(self, metadata): | |
1859 if type(metadata) == set: | |
1860 norm_metadata = set() | |
1861 for k in metadata: | |
1862 norm_metadata.add(k.lower()) | |
1863 else: | |
1864 norm_metadata = {} | |
1865 for k in metadata: | |
1866 norm_metadata[k.lower()] = metadata[k] | |
1867 return norm_metadata | |
1868 | |
1869 def _get_remote_metadata(self, headers=None): | |
1870 """ | |
1871 Extracts metadata from existing URI into a dict, so we can | |
1872 overwrite/delete from it to form the new set of metadata to apply to a | |
1873 key. | |
1874 """ | |
1875 metadata = {} | |
1876 for underscore_name in self._underscore_base_user_settable_fields: | |
1877 if hasattr(self, underscore_name): | |
1878 value = getattr(self, underscore_name) | |
1879 if value: | |
1880 # Generate HTTP field name corresponding to "_" named field. | |
1881 field_name = underscore_name.replace('_', '-') | |
1882 metadata[field_name.lower()] = value | |
1883 # self.metadata contains custom metadata, which are all user-settable. | |
1884 prefix = self.provider.metadata_prefix | |
1885 for underscore_name in self.metadata: | |
1886 field_name = underscore_name.replace('_', '-') | |
1887 metadata['%s%s' % (prefix, field_name.lower())] = ( | |
1888 self.metadata[underscore_name]) | |
1889 return metadata | |
1890 | |
1891 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, | |
1892 headers=None): | |
1893 metadata_plus = self._normalize_metadata(metadata_plus) | |
1894 metadata_minus = self._normalize_metadata(metadata_minus) | |
1895 metadata = self._get_remote_metadata() | |
1896 metadata.update(metadata_plus) | |
1897 for h in metadata_minus: | |
1898 if h in metadata: | |
1899 del metadata[h] | |
1900 src_bucket = self.bucket | |
1901 # Boto prepends the meta prefix when adding headers, so strip prefix in | |
1902 # metadata before sending back in to copy_key() call. | |
1903 rewritten_metadata = {} | |
1904 for h in metadata: | |
1905 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): | |
1906 rewritten_h = (h.replace('x-goog-meta-', '') | |
1907 .replace('x-amz-meta-', '')) | |
1908 else: | |
1909 rewritten_h = h | |
1910 rewritten_metadata[rewritten_h] = metadata[h] | |
1911 metadata = rewritten_metadata | |
1912 src_bucket.copy_key(self.name, self.bucket.name, self.name, | |
1913 metadata=metadata, preserve_acl=preserve_acl, | |
1914 headers=headers) | |
1915 | |
1916 def restore(self, days, headers=None): | |
1917 """Restore an object from an archive. | |
1918 | |
1919 :type days: int | |
1920 :param days: The lifetime of the restored object (must | |
1921 be at least 1 day). If the object is already restored | |
1922 then this parameter can be used to readjust the lifetime | |
1923 of the restored object. In this case, the days | |
1924 param is with respect to the initial time of the request. | |
1925 If the object has not been restored, this param is with | |
1926 respect to the completion time of the request. | |
1927 | |
1928 """ | |
1929 response = self.bucket.connection.make_request( | |
1930 'POST', self.bucket.name, self.name, | |
1931 data=self.RestoreBody % days, | |
1932 headers=headers, query_args='restore') | |
1933 if response.status not in (200, 202): | |
1934 provider = self.bucket.connection.provider | |
1935 raise provider.storage_response_error(response.status, | |
1936 response.reason, | |
1937 response.read()) |