Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boto/s3/key.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/boto/s3/key.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1937 +0,0 @@ -# Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ -# Copyright (c) 2011, Nexenta Systems Inc. -# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, dis- -# tribute, sublicense, and/or sell copies of the Software, and to permit -# persons to whom the Software is furnished to do so, subject to the fol- -# lowing conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- -# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -import email.utils -import errno -import hashlib -import mimetypes -import os -import re -import base64 -import binascii -import math -from hashlib import md5 -import boto.utils -from boto.compat import BytesIO, six, urllib, encodebytes - -from boto.exception import BotoClientError -from boto.exception import StorageDataError -from boto.exception import PleaseRetryException -from boto.provider import Provider -from boto.s3.keyfile import KeyFile -from boto.s3.user import User -from boto import UserAgent -from boto.utils import compute_md5, compute_hash -from boto.utils import find_matching_headers -from boto.utils import merge_headers_by_name - - -class Key(object): - """ - Represents a key (object) in an S3 bucket. - - :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. - :ivar name: The name of this Key object. - :ivar metadata: A dictionary containing user metadata that you - wish to store with the object or that has been retrieved from - an existing object. - :ivar cache_control: The value of the `Cache-Control` HTTP header. - :ivar content_type: The value of the `Content-Type` HTTP header. - :ivar content_encoding: The value of the `Content-Encoding` HTTP header. - :ivar content_disposition: The value of the `Content-Disposition` HTTP - header. - :ivar content_language: The value of the `Content-Language` HTTP header. - :ivar etag: The `etag` associated with this object. - :ivar last_modified: The string timestamp representing the last - time this object was modified in S3. - :ivar owner: The ID of the owner of this object. - :ivar storage_class: The storage class of the object. Currently, one of: - STANDARD | REDUCED_REDUNDANCY | GLACIER - :ivar md5: The MD5 hash of the contents of the object. - :ivar size: The size, in bytes, of the object. - :ivar version_id: The version ID of this object, if it is a versioned - object. - :ivar encrypted: Whether the object is encrypted while at rest on - the server. - """ - - DefaultContentType = 'application/octet-stream' - - RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> - <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> - <Days>%s</Days> - </RestoreRequest>""" - - - BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192) - - # The object metadata fields a user can set, other than custom metadata - # fields (i.e., those beginning with a provider-specific prefix like - # x-amz-meta). - base_user_settable_fields = set(["cache-control", "content-disposition", - "content-encoding", "content-language", - "content-md5", "content-type", - "x-robots-tag", "expires"]) - _underscore_base_user_settable_fields = set() - for f in base_user_settable_fields: - _underscore_base_user_settable_fields.add(f.replace('-', '_')) - # Metadata fields, whether user-settable or not, other than custom - # metadata fields (i.e., those beginning with a provider specific prefix - # like x-amz-meta). - base_fields = (base_user_settable_fields | - set(["last-modified", "content-length", "date", "etag"])) - - - - def __init__(self, bucket=None, name=None): - self.bucket = bucket - self.name = name - self.metadata = {} - self.cache_control = None - self.content_type = self.DefaultContentType - self.content_encoding = None - self.content_disposition = None - self.content_language = None - self.filename = None - self.etag = None - self.is_latest = False - self.last_modified = None - self.owner = None - self._storage_class = None - self.path = None - self.resp = None - self.mode = None - self.size = None - self.version_id = None - self.source_version_id = None - self.delete_marker = False - self.encrypted = None - # If the object is being restored, this attribute will be set to True. - # If the object is restored, it will be set to False. Otherwise this - # value will be None. If the restore is completed (ongoing_restore = - # False), the expiry_date will be populated with the expiry date of the - # restored object. - self.ongoing_restore = None - self.expiry_date = None - self.local_hashes = {} - - def __repr__(self): - if self.bucket: - name = u'<Key: %s,%s>' % (self.bucket.name, self.name) - else: - name = u'<Key: None,%s>' % self.name - - # Encode to bytes for Python 2 to prevent display decoding issues - if not isinstance(name, str): - name = name.encode('utf-8') - - return name - - def __iter__(self): - return self - - @property - def provider(self): - provider = None - if self.bucket and self.bucket.connection: - provider = self.bucket.connection.provider - return provider - - def _get_key(self): - return self.name - - def _set_key(self, value): - self.name = value - - key = property(_get_key, _set_key); - - def _get_md5(self): - if 'md5' in self.local_hashes and self.local_hashes['md5']: - return binascii.b2a_hex(self.local_hashes['md5']) - - def _set_md5(self, value): - if value: - self.local_hashes['md5'] = binascii.a2b_hex(value) - elif 'md5' in self.local_hashes: - self.local_hashes.pop('md5', None) - - md5 = property(_get_md5, _set_md5); - - def _get_base64md5(self): - if 'md5' in self.local_hashes and self.local_hashes['md5']: - md5 = self.local_hashes['md5'] - if not isinstance(md5, bytes): - md5 = md5.encode('utf-8') - return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n') - - def _set_base64md5(self, value): - if value: - if not isinstance(value, six.string_types): - value = value.decode('utf-8') - self.local_hashes['md5'] = binascii.a2b_base64(value) - elif 'md5' in self.local_hashes: - del self.local_hashes['md5'] - - base64md5 = property(_get_base64md5, _set_base64md5); - - def _get_storage_class(self): - if self._storage_class is None and self.bucket: - # Attempt to fetch storage class - list_items = list(self.bucket.list(self.name.encode('utf-8'))) - if len(list_items) and getattr(list_items[0], '_storage_class', - None): - self._storage_class = list_items[0]._storage_class - else: - # Key is not yet saved? Just use default... - self._storage_class = 'STANDARD' - - return self._storage_class - - def _set_storage_class(self, value): - self._storage_class = value - - storage_class = property(_get_storage_class, _set_storage_class) - - def get_md5_from_hexdigest(self, md5_hexdigest): - """ - A utility function to create the 2-tuple (md5hexdigest, base64md5) - from just having a precalculated md5_hexdigest. - """ - digest = binascii.unhexlify(md5_hexdigest) - base64md5 = encodebytes(digest) - if base64md5[-1] == '\n': - base64md5 = base64md5[0:-1] - return (md5_hexdigest, base64md5) - - def handle_encryption_headers(self, resp): - provider = self.bucket.connection.provider - if provider.server_side_encryption_header: - self.encrypted = resp.getheader( - provider.server_side_encryption_header, None) - else: - self.encrypted = None - - def handle_storage_class_header(self, resp): - provider = self.bucket.connection.provider - if provider.storage_class_header: - self._storage_class = resp.getheader( - provider.storage_class_header, None) - if (self._storage_class is None and - provider.get_provider_name() == 'aws'): - # S3 docs for HEAD object requests say S3 will return this - # header for all objects except Standard storage class objects. - self._storage_class = 'STANDARD' - - - def handle_version_headers(self, resp, force=False): - provider = self.bucket.connection.provider - # If the Key object already has a version_id attribute value, it - # means that it represents an explicit version and the user is - # doing a get_contents_*(version_id=<foo>) to retrieve another - # version of the Key. In that case, we don't really want to - # overwrite the version_id in this Key object. Comprende? - if self.version_id is None or force: - self.version_id = resp.getheader(provider.version_id, None) - self.source_version_id = resp.getheader(provider.copy_source_version_id, - None) - if resp.getheader(provider.delete_marker, 'false') == 'true': - self.delete_marker = True - else: - self.delete_marker = False - - def handle_restore_headers(self, response): - provider = self.bucket.connection.provider - header = response.getheader(provider.restore_header) - if header is None: - return - parts = header.split(',', 1) - for part in parts: - key, val = [i.strip() for i in part.split('=')] - val = val.replace('"', '') - if key == 'ongoing-request': - self.ongoing_restore = True if val.lower() == 'true' else False - elif key == 'expiry-date': - self.expiry_date = val - - def handle_addl_headers(self, headers): - """ - Used by Key subclasses to do additional, provider-specific - processing of response headers. No-op for this base class. - """ - pass - - def open_read(self, headers=None, query_args='', - override_num_retries=None, response_headers=None): - """ - Open this key for reading - - :type headers: dict - :param headers: Headers to pass in the web request - - :type query_args: string - :param query_args: Arguments to pass in the query string - (ie, 'torrent') - - :type override_num_retries: int - :param override_num_retries: If not None will override configured - num_retries parameter for underlying GET. - - :type response_headers: dict - :param response_headers: A dictionary containing HTTP - headers/values that will override any headers associated - with the stored object in the response. See - http://goo.gl/EWOPb for details. - """ - if self.resp is None: - self.mode = 'r' - - provider = self.bucket.connection.provider - self.resp = self.bucket.connection.make_request( - 'GET', self.bucket.name, self.name, headers, - query_args=query_args, - override_num_retries=override_num_retries) - if self.resp.status < 199 or self.resp.status > 299: - body = self.resp.read() - raise provider.storage_response_error(self.resp.status, - self.resp.reason, body) - response_headers = self.resp.msg - self.metadata = boto.utils.get_aws_metadata(response_headers, - provider) - for name, value in response_headers.items(): - # To get correct size for Range GETs, use Content-Range - # header if one was returned. If not, use Content-Length - # header. - if (name.lower() == 'content-length' and - 'Content-Range' not in response_headers): - self.size = int(value) - elif name.lower() == 'content-range': - end_range = re.sub('.*/(.*)', '\\1', value) - self.size = int(end_range) - elif name.lower() in Key.base_fields: - self.__dict__[name.lower().replace('-', '_')] = value - self.handle_version_headers(self.resp) - self.handle_encryption_headers(self.resp) - self.handle_restore_headers(self.resp) - self.handle_addl_headers(self.resp.getheaders()) - - def open_write(self, headers=None, override_num_retries=None): - """ - Open this key for writing. - Not yet implemented - - :type headers: dict - :param headers: Headers to pass in the write request - - :type override_num_retries: int - :param override_num_retries: If not None will override configured - num_retries parameter for underlying PUT. - """ - raise BotoClientError('Not Implemented') - - def open(self, mode='r', headers=None, query_args=None, - override_num_retries=None): - if mode == 'r': - self.mode = 'r' - self.open_read(headers=headers, query_args=query_args, - override_num_retries=override_num_retries) - elif mode == 'w': - self.mode = 'w' - self.open_write(headers=headers, - override_num_retries=override_num_retries) - else: - raise BotoClientError('Invalid mode: %s' % mode) - - closed = False - - def close(self, fast=False): - """ - Close this key. - - :type fast: bool - :param fast: True if you want the connection to be closed without first - reading the content. This should only be used in cases where subsequent - calls don't need to return the content from the open HTTP connection. - Note: As explained at - http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse, - callers must read the whole response before sending a new request to the - server. Calling Key.close(fast=True) and making a subsequent request to - the server will work because boto will get an httplib exception and - close/reopen the connection. - - """ - if self.resp and not fast: - self.resp.read() - self.resp = None - self.mode = None - self.closed = True - - def next(self): - """ - By providing a next method, the key object supports use as an iterator. - For example, you can now say: - - for bytes in key: - write bytes to a file or whatever - - All of the HTTP connection stuff is handled for you. - """ - self.open_read() - data = self.resp.read(self.BufferSize) - if not data: - self.close() - raise StopIteration - return data - - # Python 3 iterator support - __next__ = next - - def read(self, size=0): - self.open_read() - if size == 0: - data = self.resp.read() - else: - data = self.resp.read(size) - if not data: - self.close() - return data - - def change_storage_class(self, new_storage_class, dst_bucket=None, - validate_dst_bucket=True): - """ - Change the storage class of an existing key. - Depending on whether a different destination bucket is supplied - or not, this will either move the item within the bucket, preserving - all metadata and ACL info bucket changing the storage class or it - will copy the item to the provided destination bucket, also - preserving metadata and ACL info. - - :type new_storage_class: string - :param new_storage_class: The new storage class for the Key. - Possible values are: - * STANDARD - * REDUCED_REDUNDANCY - - :type dst_bucket: string - :param dst_bucket: The name of a destination bucket. If not - provided the current bucket of the key will be used. - - :type validate_dst_bucket: bool - :param validate_dst_bucket: If True, will validate the dst_bucket - by using an extra list request. - """ - bucket_name = dst_bucket or self.bucket.name - if new_storage_class == 'STANDARD': - return self.copy(bucket_name, self.name, - reduced_redundancy=False, preserve_acl=True, - validate_dst_bucket=validate_dst_bucket) - elif new_storage_class == 'REDUCED_REDUNDANCY': - return self.copy(bucket_name, self.name, - reduced_redundancy=True, preserve_acl=True, - validate_dst_bucket=validate_dst_bucket) - else: - raise BotoClientError('Invalid storage class: %s' % - new_storage_class) - - def copy(self, dst_bucket, dst_key, metadata=None, - reduced_redundancy=False, preserve_acl=False, - encrypt_key=False, validate_dst_bucket=True): - """ - Copy this Key to another bucket. - - :type dst_bucket: string - :param dst_bucket: The name of the destination bucket - - :type dst_key: string - :param dst_key: The name of the destination key - - :type metadata: dict - :param metadata: Metadata to be associated with new key. If - metadata is supplied, it will replace the metadata of the - source key being copied. If no metadata is supplied, the - source key's metadata will be copied to the new key. - - :type reduced_redundancy: bool - :param reduced_redundancy: If True, this will force the - storage class of the new Key to be REDUCED_REDUNDANCY - regardless of the storage class of the key being copied. - The Reduced Redundancy Storage (RRS) feature of S3, - provides lower redundancy at lower storage cost. - - :type preserve_acl: bool - :param preserve_acl: If True, the ACL from the source key will - be copied to the destination key. If False, the - destination key will have the default ACL. Note that - preserving the ACL in the new key object will require two - additional API calls to S3, one to retrieve the current - ACL and one to set that ACL on the new object. If you - don't care about the ACL, a value of False will be - significantly more efficient. - - :type encrypt_key: bool - :param encrypt_key: If True, the new copy of the object will - be encrypted on the server-side by S3 and will be stored - in an encrypted form while at rest in S3. - - :type validate_dst_bucket: bool - :param validate_dst_bucket: If True, will validate the dst_bucket - by using an extra list request. - - :rtype: :class:`boto.s3.key.Key` or subclass - :returns: An instance of the newly created key object - """ - dst_bucket = self.bucket.connection.lookup(dst_bucket, - validate_dst_bucket) - if reduced_redundancy: - storage_class = 'REDUCED_REDUNDANCY' - else: - storage_class = self.storage_class - return dst_bucket.copy_key(dst_key, self.bucket.name, - self.name, metadata, - storage_class=storage_class, - preserve_acl=preserve_acl, - encrypt_key=encrypt_key, - src_version_id=self.version_id) - - def startElement(self, name, attrs, connection): - if name == 'Owner': - self.owner = User(self) - return self.owner - else: - return None - - def endElement(self, name, value, connection): - if name == 'Key': - self.name = value - elif name == 'ETag': - self.etag = value - elif name == 'IsLatest': - if value == 'true': - self.is_latest = True - else: - self.is_latest = False - elif name == 'LastModified': - self.last_modified = value - elif name == 'Size': - self.size = int(value) - elif name == 'StorageClass': - self.storage_class = value - elif name == 'Owner': - pass - elif name == 'VersionId': - self.version_id = value - else: - setattr(self, name, value) - - def exists(self, headers=None): - """ - Returns True if the key exists - - :rtype: bool - :return: Whether the key exists on S3 - """ - return bool(self.bucket.lookup(self.name, headers=headers)) - - def delete(self, headers=None): - """ - Delete this key from S3 - """ - return self.bucket.delete_key(self.name, version_id=self.version_id, - headers=headers) - - def get_metadata(self, name): - return self.metadata.get(name) - - def set_metadata(self, name, value): - # Ensure that metadata that is vital to signing is in the correct - # case. Applies to ``Content-Type`` & ``Content-MD5``. - if name.lower() == 'content-type': - self.metadata['Content-Type'] = value - elif name.lower() == 'content-md5': - self.metadata['Content-MD5'] = value - else: - self.metadata[name] = value - if name.lower() in Key.base_user_settable_fields: - self.__dict__[name.lower().replace('-', '_')] = value - - def update_metadata(self, d): - self.metadata.update(d) - - # convenience methods for setting/getting ACL - def set_acl(self, acl_str, headers=None): - if self.bucket is not None: - self.bucket.set_acl(acl_str, self.name, headers=headers) - - def get_acl(self, headers=None): - if self.bucket is not None: - return self.bucket.get_acl(self.name, headers=headers) - - def get_xml_acl(self, headers=None): - if self.bucket is not None: - return self.bucket.get_xml_acl(self.name, headers=headers) - - def set_xml_acl(self, acl_str, headers=None): - if self.bucket is not None: - return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) - - def set_canned_acl(self, acl_str, headers=None): - return self.bucket.set_canned_acl(acl_str, self.name, headers) - - def get_redirect(self): - """Return the redirect location configured for this key. - - If no redirect is configured (via set_redirect), then None - will be returned. - - """ - response = self.bucket.connection.make_request( - 'HEAD', self.bucket.name, self.name) - if response.status == 200: - return response.getheader('x-amz-website-redirect-location') - else: - raise self.provider.storage_response_error( - response.status, response.reason, response.read()) - - def set_redirect(self, redirect_location, headers=None): - """Configure this key to redirect to another location. - - When the bucket associated with this key is accessed from the website - endpoint, a 301 redirect will be issued to the specified - `redirect_location`. - - :type redirect_location: string - :param redirect_location: The location to redirect. - - """ - if headers is None: - headers = {} - else: - headers = headers.copy() - - headers['x-amz-website-redirect-location'] = redirect_location - response = self.bucket.connection.make_request('PUT', self.bucket.name, - self.name, headers) - if response.status == 200: - return True - else: - raise self.provider.storage_response_error( - response.status, response.reason, response.read()) - - def make_public(self, headers=None): - return self.bucket.set_canned_acl('public-read', self.name, headers) - - def generate_url(self, expires_in, method='GET', headers=None, - query_auth=True, force_http=False, response_headers=None, - expires_in_absolute=False, version_id=None, - policy=None, reduced_redundancy=False, encrypt_key=False): - """ - Generate a URL to access this key. - - :type expires_in: int - :param expires_in: How long the url is valid for, in seconds. - - :type method: string - :param method: The method to use for retrieving the file - (default is GET). - - :type headers: dict - :param headers: Any headers to pass along in the request. - - :type query_auth: bool - :param query_auth: If True, signs the request in the URL. - - :type force_http: bool - :param force_http: If True, http will be used instead of https. - - :type response_headers: dict - :param response_headers: A dictionary containing HTTP - headers/values that will override any headers associated - with the stored object in the response. See - http://goo.gl/EWOPb for details. - - :type expires_in_absolute: bool - :param expires_in_absolute: - - :type version_id: string - :param version_id: The version_id of the object to GET. If specified - this overrides any value in the key. - - :type policy: :class:`boto.s3.acl.CannedACLStrings` - :param policy: A canned ACL policy that will be applied to the - new key in S3. - - :type reduced_redundancy: bool - :param reduced_redundancy: If True, this will set the storage - class of the new Key to be REDUCED_REDUNDANCY. The Reduced - Redundancy Storage (RRS) feature of S3, provides lower - redundancy at lower storage cost. - - :type encrypt_key: bool - :param encrypt_key: If True, the new copy of the object will - be encrypted on the server-side by S3 and will be stored - in an encrypted form while at rest in S3. - - :rtype: string - :return: The URL to access the key - """ - provider = self.bucket.connection.provider - version_id = version_id or self.version_id - if headers is None: - headers = {} - else: - headers = headers.copy() - - # add headers accordingly (usually PUT case) - if policy: - headers[provider.acl_header] = policy - if reduced_redundancy: - self.storage_class = 'REDUCED_REDUNDANCY' - if provider.storage_class_header: - headers[provider.storage_class_header] = self.storage_class - if encrypt_key: - headers[provider.server_side_encryption_header] = 'AES256' - headers = boto.utils.merge_meta(headers, self.metadata, provider) - - return self.bucket.connection.generate_url(expires_in, method, - self.bucket.name, self.name, - headers, query_auth, - force_http, - response_headers, - expires_in_absolute, - version_id) - - def send_file(self, fp, headers=None, cb=None, num_cb=10, - query_args=None, chunked_transfer=False, size=None): - """ - Upload a file to a key into a bucket on S3. - - :type fp: file - :param fp: The file pointer to upload. The file pointer must - point at the offset from which you wish to upload. - ie. if uploading the full file, it should point at the - start of the file. Normally when a file is opened for - reading, the fp will point at the first byte. See the - bytes parameter below for more info. - - :type headers: dict - :param headers: The headers to pass along with the PUT request - - :type num_cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file - transfer. Providing a negative integer will cause your - callback to be called with each buffer read. - - :type query_args: string - :param query_args: (optional) Arguments to pass in the query string. - - :type chunked_transfer: boolean - :param chunked_transfer: (optional) If true, we use chunked - Transfer-Encoding. - - :type size: int - :param size: (optional) The Maximum number of bytes to read - from the file pointer (fp). This is useful when uploading - a file in multiple parts where you are splitting the file - up into different ranges to be uploaded. If not specified, - the default behaviour is to read all bytes from the file - pointer. Less bytes may be available. - """ - self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, - query_args=query_args, - chunked_transfer=chunked_transfer, size=size) - - def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10, - query_args=None, chunked_transfer=False, size=None, - hash_algs=None): - provider = self.bucket.connection.provider - try: - spos = fp.tell() - except IOError: - spos = None - self.read_from_stream = False - - # If hash_algs is unset and the MD5 hasn't already been computed, - # default to an MD5 hash_alg to hash the data on-the-fly. - if hash_algs is None and not self.md5: - hash_algs = {'md5': md5} - digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) - - def sender(http_conn, method, path, data, headers): - # This function is called repeatedly for temporary retries - # so we must be sure the file pointer is pointing at the - # start of the data. - if spos is not None and spos != fp.tell(): - fp.seek(spos) - elif spos is None and self.read_from_stream: - # if seek is not supported, and we've read from this - # stream already, then we need to abort retries to - # avoid setting bad data. - raise provider.storage_data_error( - 'Cannot retry failed request. fp does not support seeking.') - - # If the caller explicitly specified host header, tell putrequest - # not to add a second host header. Similarly for accept-encoding. - skips = {} - if boto.utils.find_matching_headers('host', headers): - skips['skip_host'] = 1 - if boto.utils.find_matching_headers('accept-encoding', headers): - skips['skip_accept_encoding'] = 1 - http_conn.putrequest(method, path, **skips) - for key in headers: - http_conn.putheader(key, headers[key]) - http_conn.endheaders() - - save_debug = self.bucket.connection.debug - self.bucket.connection.debug = 0 - # If the debuglevel < 4 we don't want to show connection - # payload, so turn off HTTP connection-level debug output (to - # be restored below). - # Use the getattr approach to allow this to work in AppEngine. - if getattr(http_conn, 'debuglevel', 0) < 4: - http_conn.set_debuglevel(0) - - data_len = 0 - if cb: - if size: - cb_size = size - elif self.size: - cb_size = self.size - else: - cb_size = 0 - if chunked_transfer and cb_size == 0: - # For chunked Transfer, we call the cb for every 1MB - # of data transferred, except when we know size. - cb_count = (1024 * 1024) / self.BufferSize - elif num_cb > 1: - cb_count = int( - math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) - elif num_cb < 0: - cb_count = -1 - else: - cb_count = 0 - i = 0 - cb(data_len, cb_size) - - bytes_togo = size - if bytes_togo and bytes_togo < self.BufferSize: - chunk = fp.read(bytes_togo) - else: - chunk = fp.read(self.BufferSize) - - if not isinstance(chunk, bytes): - chunk = chunk.encode('utf-8') - - if spos is None: - # read at least something from a non-seekable fp. - self.read_from_stream = True - while chunk: - chunk_len = len(chunk) - data_len += chunk_len - if chunked_transfer: - http_conn.send('%x;\r\n' % chunk_len) - http_conn.send(chunk) - http_conn.send('\r\n') - else: - http_conn.send(chunk) - for alg in digesters: - digesters[alg].update(chunk) - if bytes_togo: - bytes_togo -= chunk_len - if bytes_togo <= 0: - break - if cb: - i += 1 - if i == cb_count or cb_count == -1: - cb(data_len, cb_size) - i = 0 - if bytes_togo and bytes_togo < self.BufferSize: - chunk = fp.read(bytes_togo) - else: - chunk = fp.read(self.BufferSize) - - if not isinstance(chunk, bytes): - chunk = chunk.encode('utf-8') - - self.size = data_len - - for alg in digesters: - self.local_hashes[alg] = digesters[alg].digest() - - if chunked_transfer: - http_conn.send('0\r\n') - # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) - http_conn.send('\r\n') - - if cb and (cb_count <= 1 or i > 0) and data_len > 0: - cb(data_len, cb_size) - - http_conn.set_debuglevel(save_debug) - self.bucket.connection.debug = save_debug - response = http_conn.getresponse() - body = response.read() - - if not self.should_retry(response, chunked_transfer): - raise provider.storage_response_error( - response.status, response.reason, body) - - return response - - if not headers: - headers = {} - else: - headers = headers.copy() - # Overwrite user-supplied user-agent. - for header in find_matching_headers('User-Agent', headers): - del headers[header] - headers['User-Agent'] = UserAgent - # If storage_class is None, then a user has not explicitly requested - # a storage class, so we can assume STANDARD here - if self._storage_class not in [None, 'STANDARD']: - headers[provider.storage_class_header] = self.storage_class - if find_matching_headers('Content-Encoding', headers): - self.content_encoding = merge_headers_by_name( - 'Content-Encoding', headers) - if find_matching_headers('Content-Language', headers): - self.content_language = merge_headers_by_name( - 'Content-Language', headers) - content_type_headers = find_matching_headers('Content-Type', headers) - if content_type_headers: - # Some use cases need to suppress sending of the Content-Type - # header and depend on the receiving server to set the content - # type. This can be achieved by setting headers['Content-Type'] - # to None when calling this method. - if (len(content_type_headers) == 1 and - headers[content_type_headers[0]] is None): - # Delete null Content-Type value to skip sending that header. - del headers[content_type_headers[0]] - else: - self.content_type = merge_headers_by_name( - 'Content-Type', headers) - elif self.path: - self.content_type = mimetypes.guess_type(self.path)[0] - if self.content_type is None: - self.content_type = self.DefaultContentType - headers['Content-Type'] = self.content_type - else: - headers['Content-Type'] = self.content_type - if self.base64md5: - headers['Content-MD5'] = self.base64md5 - if chunked_transfer: - headers['Transfer-Encoding'] = 'chunked' - #if not self.base64md5: - # headers['Trailer'] = "Content-MD5" - else: - headers['Content-Length'] = str(self.size) - # This is terrible. We need a SHA256 of the body for SigV4, but to do - # the chunked ``sender`` behavior above, the ``fp`` isn't available to - # the auth mechanism (because closures). Detect if it's SigV4 & embelish - # while we can before the auth calculations occur. - if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability(): - kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256} - if size is not None: - kwargs['size'] = size - headers['_sha256'] = compute_hash(**kwargs)[0] - headers['Expect'] = '100-Continue' - headers = boto.utils.merge_meta(headers, self.metadata, provider) - resp = self.bucket.connection.make_request( - 'PUT', - self.bucket.name, - self.name, - headers, - sender=sender, - query_args=query_args - ) - self.handle_version_headers(resp, force=True) - self.handle_addl_headers(resp.getheaders()) - - def should_retry(self, response, chunked_transfer=False): - provider = self.bucket.connection.provider - - if not chunked_transfer: - if response.status in [500, 503]: - # 500 & 503 can be plain retries. - return True - - if response.getheader('location'): - # If there's a redirect, plain retry. - return True - - if 200 <= response.status <= 299: - self.etag = response.getheader('etag') - md5 = self.md5 - if isinstance(md5, bytes): - md5 = md5.decode('utf-8') - - # If you use customer-provided encryption keys, the ETag value that - # Amazon S3 returns in the response will not be the MD5 of the - # object. - amz_server_side_encryption_customer_algorithm = response.getheader( - 'x-amz-server-side-encryption-customer-algorithm', None) - # The same is applicable for KMS-encrypted objects in gs buckets. - goog_customer_managed_encryption = response.getheader( - 'x-goog-encryption-kms-key-name', None) - if (amz_server_side_encryption_customer_algorithm is None and - goog_customer_managed_encryption is None): - if self.etag != '"%s"' % md5: - raise provider.storage_data_error( - 'ETag from S3 did not match computed MD5. ' - '%s vs. %s' % (self.etag, self.md5)) - - return True - - if response.status == 400: - # The 400 must be trapped so the retry handler can check to - # see if it was a timeout. - # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb - # out. - body = response.read() - err = provider.storage_response_error( - response.status, - response.reason, - body - ) - - if err.error_code in ['RequestTimeout']: - raise PleaseRetryException( - "Saw %s, retrying" % err.error_code, - response=response - ) - - return False - - def compute_md5(self, fp, size=None): - """ - :type fp: file - :param fp: File pointer to the file to MD5 hash. The file - pointer will be reset to the same position before the - method returns. - - :type size: int - :param size: (optional) The Maximum number of bytes to read - from the file pointer (fp). This is useful when uploading - a file in multiple parts where the file is being split - in place into different parts. Less bytes may be available. - """ - hex_digest, b64_digest, data_size = compute_md5(fp, size=size) - # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. - # The internal implementation of compute_md5() needs to return the - # data size but we don't want to return that value to the external - # caller because it changes the class interface (i.e. it might - # break some code) so we consume the third tuple value here and - # return the remainder of the tuple to the caller, thereby preserving - # the existing interface. - self.size = data_size - return (hex_digest, b64_digest) - - def set_contents_from_stream(self, fp, headers=None, replace=True, - cb=None, num_cb=10, policy=None, - reduced_redundancy=False, query_args=None, - size=None): - """ - Store an object using the name of the Key object as the key in - cloud and the contents of the data stream pointed to by 'fp' as - the contents. - - The stream object is not seekable and total size is not known. - This has the implication that we can't specify the - Content-Size and Content-MD5 in the header. So for huge - uploads, the delay in calculating MD5 is avoided but with a - penalty of inability to verify the integrity of the uploaded - data. - - :type fp: file - :param fp: the file whose contents are to be uploaded - - :type headers: dict - :param headers: additional HTTP headers to be sent with the - PUT request. - - :type replace: bool - :param replace: If this parameter is False, the method will first check - to see if an object exists in the bucket with the same key. If it - does, it won't overwrite it. The default value is True which will - overwrite the object. - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two integer - parameters, the first representing the number of bytes that have - been successfully transmitted to GS and the second representing the - total number of bytes that need to be transmitted. - - :type num_cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter, this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type policy: :class:`boto.gs.acl.CannedACLStrings` - :param policy: A canned ACL policy that will be applied to the new key - in GS. - - :type reduced_redundancy: bool - :param reduced_redundancy: If True, this will set the storage - class of the new Key to be REDUCED_REDUNDANCY. The Reduced - Redundancy Storage (RRS) feature of S3, provides lower - redundancy at lower storage cost. - - :type size: int - :param size: (optional) The Maximum number of bytes to read from - the file pointer (fp). This is useful when uploading a - file in multiple parts where you are splitting the file up - into different ranges to be uploaded. If not specified, - the default behaviour is to read all bytes from the file - pointer. Less bytes may be available. - """ - - provider = self.bucket.connection.provider - if not provider.supports_chunked_transfer(): - raise BotoClientError('%s does not support chunked transfer' - % provider.get_provider_name()) - - # Name of the Object should be specified explicitly for Streams. - if not self.name or self.name == '': - raise BotoClientError('Cannot determine the destination ' - 'object name for the given stream') - - if headers is None: - headers = {} - if policy: - headers[provider.acl_header] = policy - - if reduced_redundancy: - self.storage_class = 'REDUCED_REDUNDANCY' - if provider.storage_class_header: - headers[provider.storage_class_header] = self.storage_class - - if self.bucket is not None: - if not replace: - if self.bucket.lookup(self.name): - return - self.send_file(fp, headers, cb, num_cb, query_args, - chunked_transfer=True, size=size) - - def set_contents_from_file(self, fp, headers=None, replace=True, - cb=None, num_cb=10, policy=None, md5=None, - reduced_redundancy=False, query_args=None, - encrypt_key=False, size=None, rewind=False): - """ - Store an object in S3 using the name of the Key object as the - key in S3 and the contents of the file pointed to by 'fp' as the - contents. The data is read from 'fp' from its current position until - 'size' bytes have been read or EOF. - - :type fp: file - :param fp: the file whose contents to upload - - :type headers: dict - :param headers: Additional HTTP headers that will be sent with - the PUT request. - - :type replace: bool - :param replace: If this parameter is False, the method will - first check to see if an object exists in the bucket with - the same key. If it does, it won't overwrite it. The - default value is True which will overwrite the object. - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type num_cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type policy: :class:`boto.s3.acl.CannedACLStrings` - :param policy: A canned ACL policy that will be applied to the - new key in S3. - - :type md5: A tuple containing the hexdigest version of the MD5 - checksum of the file as the first element and the - Base64-encoded version of the plain checksum as the second - element. This is the same format returned by the - compute_md5 method. - :param md5: If you need to compute the MD5 for any reason - prior to upload, it's silly to have to do it twice so this - param, if present, will be used as the MD5 values of the - file. Otherwise, the checksum will be computed. - - :type reduced_redundancy: bool - :param reduced_redundancy: If True, this will set the storage - class of the new Key to be REDUCED_REDUNDANCY. The Reduced - Redundancy Storage (RRS) feature of S3, provides lower - redundancy at lower storage cost. - - :type encrypt_key: bool - :param encrypt_key: If True, the new copy of the object will - be encrypted on the server-side by S3 and will be stored - in an encrypted form while at rest in S3. - - :type size: int - :param size: (optional) The Maximum number of bytes to read - from the file pointer (fp). This is useful when uploading - a file in multiple parts where you are splitting the file - up into different ranges to be uploaded. If not specified, - the default behaviour is to read all bytes from the file - pointer. Less bytes may be available. - - :type rewind: bool - :param rewind: (optional) If True, the file pointer (fp) will - be rewound to the start before any bytes are read from - it. The default behaviour is False which reads from the - current position of the file pointer (fp). - - :rtype: int - :return: The number of bytes written to the key. - """ - provider = self.bucket.connection.provider - headers = headers or {} - if policy: - headers[provider.acl_header] = policy - if encrypt_key: - headers[provider.server_side_encryption_header] = 'AES256' - - if rewind: - # caller requests reading from beginning of fp. - fp.seek(0, os.SEEK_SET) - else: - # The following seek/tell/seek logic is intended - # to detect applications using the older interface to - # set_contents_from_file(), which automatically rewound the - # file each time the Key was reused. This changed with commit - # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads - # split into multiple parts and uploaded in parallel, and at - # the time of that commit this check was added because otherwise - # older programs would get a success status and upload an empty - # object. Unfortuantely, it's very inefficient for fp's implemented - # by KeyFile (used, for example, by gsutil when copying between - # providers). So, we skip the check for the KeyFile case. - # TODO: At some point consider removing this seek/tell/seek - # logic, after enough time has passed that it's unlikely any - # programs remain that assume the older auto-rewind interface. - if not isinstance(fp, KeyFile): - spos = fp.tell() - fp.seek(0, os.SEEK_END) - if fp.tell() == spos: - fp.seek(0, os.SEEK_SET) - if fp.tell() != spos: - # Raise an exception as this is likely a programming - # error whereby there is data before the fp but nothing - # after it. - fp.seek(spos) - raise AttributeError('fp is at EOF. Use rewind option ' - 'or seek() to data start.') - # seek back to the correct position. - fp.seek(spos) - - if reduced_redundancy: - self.storage_class = 'REDUCED_REDUNDANCY' - if provider.storage_class_header: - headers[provider.storage_class_header] = self.storage_class - # TODO - What if provider doesn't support reduced reduncancy? - # What if different providers provide different classes? - if hasattr(fp, 'name'): - self.path = fp.name - if self.bucket is not None: - if not md5 and provider.supports_chunked_transfer(): - # defer md5 calculation to on the fly and - # we don't know anything about size yet. - chunked_transfer = True - self.size = None - else: - chunked_transfer = False - if isinstance(fp, KeyFile): - # Avoid EOF seek for KeyFile case as it's very inefficient. - key = fp.getkey() - size = key.size - fp.tell() - self.size = size - # At present both GCS and S3 use MD5 for the etag for - # non-multipart-uploaded objects. If the etag is 32 hex - # chars use it as an MD5, to avoid having to read the file - # twice while transferring. - if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): - etag = key.etag.strip('"') - md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) - if not md5: - # compute_md5() and also set self.size to actual - # size of the bytes read computing the md5. - md5 = self.compute_md5(fp, size) - # adjust size if required - size = self.size - elif size: - self.size = size - else: - # If md5 is provided, still need to size so - # calculate based on bytes to end of content - spos = fp.tell() - fp.seek(0, os.SEEK_END) - self.size = fp.tell() - spos - fp.seek(spos) - size = self.size - self.md5 = md5[0] - self.base64md5 = md5[1] - - if self.name is None: - self.name = self.md5 - if not replace: - if self.bucket.lookup(self.name): - return - - self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, - query_args=query_args, - chunked_transfer=chunked_transfer, size=size) - # return number of bytes written. - return self.size - - def set_contents_from_filename(self, filename, headers=None, replace=True, - cb=None, num_cb=10, policy=None, md5=None, - reduced_redundancy=False, - encrypt_key=False): - """ - Store an object in S3 using the name of the Key object as the - key in S3 and the contents of the file named by 'filename'. - See set_contents_from_file method for details about the - parameters. - - :type filename: string - :param filename: The name of the file that you want to put onto S3 - - :type headers: dict - :param headers: Additional headers to pass along with the - request to AWS. - - :type replace: bool - :param replace: If True, replaces the contents of the file - if it already exists. - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type policy: :class:`boto.s3.acl.CannedACLStrings` - :param policy: A canned ACL policy that will be applied to the - new key in S3. - - :type md5: A tuple containing the hexdigest version of the MD5 - checksum of the file as the first element and the - Base64-encoded version of the plain checksum as the second - element. This is the same format returned by the - compute_md5 method. - :param md5: If you need to compute the MD5 for any reason - prior to upload, it's silly to have to do it twice so this - param, if present, will be used as the MD5 values of the - file. Otherwise, the checksum will be computed. - - :type reduced_redundancy: bool - :param reduced_redundancy: If True, this will set the storage - class of the new Key to be REDUCED_REDUNDANCY. The Reduced - Redundancy Storage (RRS) feature of S3, provides lower - redundancy at lower storage cost. :type encrypt_key: bool - :param encrypt_key: If True, the new copy of the object - will be encrypted on the server-side by S3 and will be - stored in an encrypted form while at rest in S3. - - :rtype: int - :return: The number of bytes written to the key. - """ - with open(filename, 'rb') as fp: - return self.set_contents_from_file(fp, headers, replace, cb, - num_cb, policy, md5, - reduced_redundancy, - encrypt_key=encrypt_key) - - def set_contents_from_string(self, string_data, headers=None, replace=True, - cb=None, num_cb=10, policy=None, md5=None, - reduced_redundancy=False, - encrypt_key=False): - """ - Store an object in S3 using the name of the Key object as the - key in S3 and the string 's' as the contents. - See set_contents_from_file method for details about the - parameters. - - :type headers: dict - :param headers: Additional headers to pass along with the - request to AWS. - - :type replace: bool - :param replace: If True, replaces the contents of the file if - it already exists. - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type num_cb: int - :param num_cb: (optional) If a callback is specified with the - num_cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type policy: :class:`boto.s3.acl.CannedACLStrings` - :param policy: A canned ACL policy that will be applied to the - new key in S3. - - :type md5: A tuple containing the hexdigest version of the MD5 - checksum of the file as the first element and the - Base64-encoded version of the plain checksum as the second - element. This is the same format returned by the - compute_md5 method. - :param md5: If you need to compute the MD5 for any reason - prior to upload, it's silly to have to do it twice so this - param, if present, will be used as the MD5 values of the - file. Otherwise, the checksum will be computed. - - :type reduced_redundancy: bool - :param reduced_redundancy: If True, this will set the storage - class of the new Key to be REDUCED_REDUNDANCY. The Reduced - Redundancy Storage (RRS) feature of S3, provides lower - redundancy at lower storage cost. - - :type encrypt_key: bool - :param encrypt_key: If True, the new copy of the object will - be encrypted on the server-side by S3 and will be stored - in an encrypted form while at rest in S3. - """ - if not isinstance(string_data, bytes): - string_data = string_data.encode("utf-8") - fp = BytesIO(string_data) - r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, - policy, md5, reduced_redundancy, - encrypt_key=encrypt_key) - fp.close() - return r - - def get_file(self, fp, headers=None, cb=None, num_cb=10, - torrent=False, version_id=None, override_num_retries=None, - response_headers=None): - """ - Retrieves a file from an S3 Key - - :type fp: file - :param fp: File pointer to put the data into - - :type headers: string - :param: headers to send when retrieving the files - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type torrent: bool - :param torrent: Flag for whether to get a torrent for the file - - :type override_num_retries: int - :param override_num_retries: If not None will override configured - num_retries parameter for underlying GET. - - :type response_headers: dict - :param response_headers: A dictionary containing HTTP - headers/values that will override any headers associated - with the stored object in the response. See - http://goo.gl/EWOPb for details. - - :type version_id: str - :param version_id: The ID of a particular version of the object. - If this parameter is not supplied but the Key object has - a ``version_id`` attribute, that value will be used when - retrieving the object. You can set the Key object's - ``version_id`` attribute to None to always grab the latest - version from a version-enabled bucket. - """ - self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, - torrent=torrent, version_id=version_id, - override_num_retries=override_num_retries, - response_headers=response_headers, - hash_algs=None, - query_args=None) - - def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, - torrent=False, version_id=None, override_num_retries=None, - response_headers=None, hash_algs=None, query_args=None): - if headers is None: - headers = {} - save_debug = self.bucket.connection.debug - if self.bucket.connection.debug == 1: - self.bucket.connection.debug = 0 - - query_args = query_args or [] - if torrent: - query_args.append('torrent') - - if hash_algs is None and not torrent: - hash_algs = {'md5': md5} - digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) - - # If a version_id is passed in, use that. If not, check to see - # if the Key object has an explicit version_id and, if so, use that. - # Otherwise, don't pass a version_id query param. - if version_id is None: - version_id = self.version_id - if version_id: - query_args.append('versionId=%s' % version_id) - if response_headers: - for key in response_headers: - query_args.append('%s=%s' % ( - key, urllib.parse.quote(response_headers[key]))) - query_args = '&'.join(query_args) - self.open('r', headers, query_args=query_args, - override_num_retries=override_num_retries) - - data_len = 0 - if cb: - if self.size is None: - cb_size = 0 - else: - cb_size = self.size - if self.size is None and num_cb != -1: - # If size is not available due to chunked transfer for example, - # we'll call the cb for every 1MB of data transferred. - cb_count = (1024 * 1024) / self.BufferSize - elif num_cb > 1: - cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) - elif num_cb < 0: - cb_count = -1 - else: - cb_count = 0 - i = 0 - cb(data_len, cb_size) - try: - for bytes in self: - fp.write(bytes) - data_len += len(bytes) - for alg in digesters: - digesters[alg].update(bytes) - if cb: - if cb_size > 0 and data_len >= cb_size: - break - i += 1 - if i == cb_count or cb_count == -1: - cb(data_len, cb_size) - i = 0 - except IOError as e: - if e.errno == errno.ENOSPC: - raise StorageDataError('Out of space for destination file ' - '%s' % fp.name) - raise - if cb and (cb_count <= 1 or i > 0) and data_len > 0: - cb(data_len, cb_size) - for alg in digesters: - self.local_hashes[alg] = digesters[alg].digest() - if self.size is None and not torrent and "Range" not in headers: - self.size = data_len - self.close() - self.bucket.connection.debug = save_debug - - def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): - """ - Get a torrent file (see to get_file) - - :type fp: file - :param fp: The file pointer of where to put the torrent - - :type headers: dict - :param headers: Headers to be passed - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - """ - return self.get_file(fp, headers, cb, num_cb, torrent=True) - - def get_contents_to_file(self, fp, headers=None, - cb=None, num_cb=10, - torrent=False, - version_id=None, - res_download_handler=None, - response_headers=None): - """ - Retrieve an object from S3 using the name of the Key object as the - key in S3. Write the contents of the object to the file pointed - to by 'fp'. - - :type fp: File -like object - :param fp: - - :type headers: dict - :param headers: additional HTTP headers that will be sent with - the GET request. - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type torrent: bool - :param torrent: If True, returns the contents of a torrent - file as a string. - - :type res_upload_handler: ResumableDownloadHandler - :param res_download_handler: If provided, this handler will - perform the download. - - :type response_headers: dict - :param response_headers: A dictionary containing HTTP - headers/values that will override any headers associated - with the stored object in the response. See - http://goo.gl/EWOPb for details. - - :type version_id: str - :param version_id: The ID of a particular version of the object. - If this parameter is not supplied but the Key object has - a ``version_id`` attribute, that value will be used when - retrieving the object. You can set the Key object's - ``version_id`` attribute to None to always grab the latest - version from a version-enabled bucket. - """ - if self.bucket is not None: - if res_download_handler: - res_download_handler.get_file(self, fp, headers, cb, num_cb, - torrent=torrent, - version_id=version_id) - else: - self.get_file(fp, headers, cb, num_cb, torrent=torrent, - version_id=version_id, - response_headers=response_headers) - - def get_contents_to_filename(self, filename, headers=None, - cb=None, num_cb=10, - torrent=False, - version_id=None, - res_download_handler=None, - response_headers=None): - """ - Retrieve an object from S3 using the name of the Key object as the - key in S3. Store contents of the object to a file named by 'filename'. - See get_contents_to_file method for details about the - parameters. - - :type filename: string - :param filename: The filename of where to put the file contents - - :type headers: dict - :param headers: Any additional headers to send in the request - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type num_cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type torrent: bool - :param torrent: If True, returns the contents of a torrent file - as a string. - - :type res_upload_handler: ResumableDownloadHandler - :param res_download_handler: If provided, this handler will - perform the download. - - :type response_headers: dict - :param response_headers: A dictionary containing HTTP - headers/values that will override any headers associated - with the stored object in the response. See - http://goo.gl/EWOPb for details. - - :type version_id: str - :param version_id: The ID of a particular version of the object. - If this parameter is not supplied but the Key object has - a ``version_id`` attribute, that value will be used when - retrieving the object. You can set the Key object's - ``version_id`` attribute to None to always grab the latest - version from a version-enabled bucket. - """ - try: - with open(filename, 'wb') as fp: - self.get_contents_to_file(fp, headers, cb, num_cb, - torrent=torrent, - version_id=version_id, - res_download_handler=res_download_handler, - response_headers=response_headers) - except Exception: - os.remove(filename) - raise - # if last_modified date was sent from s3, try to set file's timestamp - if self.last_modified is not None: - try: - modified_tuple = email.utils.parsedate_tz(self.last_modified) - modified_stamp = int(email.utils.mktime_tz(modified_tuple)) - os.utime(fp.name, (modified_stamp, modified_stamp)) - except Exception: - pass - - def get_contents_as_string(self, headers=None, - cb=None, num_cb=10, - torrent=False, - version_id=None, - response_headers=None, encoding=None): - """ - Retrieve an object from S3 using the name of the Key object as the - key in S3. Return the contents of the object as a string. - See get_contents_to_file method for details about the - parameters. - - :type headers: dict - :param headers: Any additional headers to send in the request - - :type cb: function - :param cb: a callback function that will be called to report - progress on the upload. The callback should accept two - integer parameters, the first representing the number of - bytes that have been successfully transmitted to S3 and - the second representing the size of the to be transmitted - object. - - :type cb: int - :param num_cb: (optional) If a callback is specified with the - cb parameter this parameter determines the granularity of - the callback by defining the maximum number of times the - callback will be called during the file transfer. - - :type torrent: bool - :param torrent: If True, returns the contents of a torrent file - as a string. - - :type response_headers: dict - :param response_headers: A dictionary containing HTTP - headers/values that will override any headers associated - with the stored object in the response. See - http://goo.gl/EWOPb for details. - - :type version_id: str - :param version_id: The ID of a particular version of the object. - If this parameter is not supplied but the Key object has - a ``version_id`` attribute, that value will be used when - retrieving the object. You can set the Key object's - ``version_id`` attribute to None to always grab the latest - version from a version-enabled bucket. - - :type encoding: str - :param encoding: The text encoding to use, such as ``utf-8`` - or ``iso-8859-1``. If set, then a string will be returned. - Defaults to ``None`` and returns bytes. - - :rtype: bytes or str - :returns: The contents of the file as bytes or a string - """ - fp = BytesIO() - self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, - version_id=version_id, - response_headers=response_headers) - value = fp.getvalue() - - if encoding is not None: - value = value.decode(encoding) - - return value - - def add_email_grant(self, permission, email_address, headers=None): - """ - Convenience method that provides a quick way to add an email grant - to a key. This method retrieves the current ACL, creates a new - grant based on the parameters passed in, adds that grant to the ACL - and then PUT's the new ACL back to S3. - - :type permission: string - :param permission: The permission being granted. Should be one of: - (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). - - :type email_address: string - :param email_address: The email address associated with the AWS - account your are granting the permission to. - - :type recursive: boolean - :param recursive: A boolean value to controls whether the - command will apply the grant to all keys within the bucket - or not. The default value is False. By passing a True - value, the call will iterate through all keys in the - bucket and apply the same grant to each key. CAUTION: If - you have a lot of keys, this could take a long time! - """ - policy = self.get_acl(headers=headers) - policy.acl.add_email_grant(permission, email_address) - self.set_acl(policy, headers=headers) - - def add_user_grant(self, permission, user_id, headers=None, - display_name=None): - """ - Convenience method that provides a quick way to add a canonical - user grant to a key. This method retrieves the current ACL, - creates a new grant based on the parameters passed in, adds that - grant to the ACL and then PUT's the new ACL back to S3. - - :type permission: string - :param permission: The permission being granted. Should be one of: - (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). - - :type user_id: string - :param user_id: The canonical user id associated with the AWS - account your are granting the permission to. - - :type display_name: string - :param display_name: An option string containing the user's - Display Name. Only required on Walrus. - """ - policy = self.get_acl(headers=headers) - policy.acl.add_user_grant(permission, user_id, - display_name=display_name) - self.set_acl(policy, headers=headers) - - def _normalize_metadata(self, metadata): - if type(metadata) == set: - norm_metadata = set() - for k in metadata: - norm_metadata.add(k.lower()) - else: - norm_metadata = {} - for k in metadata: - norm_metadata[k.lower()] = metadata[k] - return norm_metadata - - def _get_remote_metadata(self, headers=None): - """ - Extracts metadata from existing URI into a dict, so we can - overwrite/delete from it to form the new set of metadata to apply to a - key. - """ - metadata = {} - for underscore_name in self._underscore_base_user_settable_fields: - if hasattr(self, underscore_name): - value = getattr(self, underscore_name) - if value: - # Generate HTTP field name corresponding to "_" named field. - field_name = underscore_name.replace('_', '-') - metadata[field_name.lower()] = value - # self.metadata contains custom metadata, which are all user-settable. - prefix = self.provider.metadata_prefix - for underscore_name in self.metadata: - field_name = underscore_name.replace('_', '-') - metadata['%s%s' % (prefix, field_name.lower())] = ( - self.metadata[underscore_name]) - return metadata - - def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, - headers=None): - metadata_plus = self._normalize_metadata(metadata_plus) - metadata_minus = self._normalize_metadata(metadata_minus) - metadata = self._get_remote_metadata() - metadata.update(metadata_plus) - for h in metadata_minus: - if h in metadata: - del metadata[h] - src_bucket = self.bucket - # Boto prepends the meta prefix when adding headers, so strip prefix in - # metadata before sending back in to copy_key() call. - rewritten_metadata = {} - for h in metadata: - if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): - rewritten_h = (h.replace('x-goog-meta-', '') - .replace('x-amz-meta-', '')) - else: - rewritten_h = h - rewritten_metadata[rewritten_h] = metadata[h] - metadata = rewritten_metadata - src_bucket.copy_key(self.name, self.bucket.name, self.name, - metadata=metadata, preserve_acl=preserve_acl, - headers=headers) - - def restore(self, days, headers=None): - """Restore an object from an archive. - - :type days: int - :param days: The lifetime of the restored object (must - be at least 1 day). If the object is already restored - then this parameter can be used to readjust the lifetime - of the restored object. In this case, the days - param is with respect to the initial time of the request. - If the object has not been restored, this param is with - respect to the completion time of the request. - - """ - response = self.bucket.connection.make_request( - 'POST', self.bucket.name, self.name, - data=self.RestoreBody % days, - headers=headers, query_args='restore') - if response.status not in (200, 202): - provider = self.bucket.connection.provider - raise provider.storage_response_error(response.status, - response.reason, - response.read())