Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boto/glacier/vault.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
author | shellac |
---|---|
date | Thu, 14 May 2020 14:56:58 -0400 |
parents | 26e78fe6e8c4 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/env/lib/python3.7/site-packages/boto/glacier/vault.py Thu May 14 14:56:58 2020 -0400 @@ -0,0 +1,450 @@ +# -*- coding: utf-8 -*- +# Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ +# Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import codecs +from boto.glacier.exceptions import UploadArchiveError +from boto.glacier.job import Job +from boto.glacier.writer import compute_hashes_from_fileobj, \ + resume_file_upload, Writer +from boto.glacier.concurrent import ConcurrentUploader +from boto.glacier.utils import minimum_part_size, DEFAULT_PART_SIZE +import os.path + + +_MEGABYTE = 1024 * 1024 +_GIGABYTE = 1024 * _MEGABYTE + +MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE +MAXIMUM_NUMBER_OF_PARTS = 10000 + + +class Vault(object): + + DefaultPartSize = DEFAULT_PART_SIZE + SingleOperationThreshold = 100 * _MEGABYTE + + ResponseDataElements = (('VaultName', 'name', None), + ('VaultARN', 'arn', None), + ('CreationDate', 'creation_date', None), + ('LastInventoryDate', 'last_inventory_date', None), + ('SizeInBytes', 'size', 0), + ('NumberOfArchives', 'number_of_archives', 0)) + + def __init__(self, layer1, response_data=None): + self.layer1 = layer1 + if response_data: + for response_name, attr_name, default in self.ResponseDataElements: + value = response_data[response_name] + setattr(self, attr_name, value) + else: + for response_name, attr_name, default in self.ResponseDataElements: + setattr(self, attr_name, default) + + def __repr__(self): + return 'Vault("%s")' % self.arn + + def delete(self): + """ + Delete's this vault. WARNING! + """ + self.layer1.delete_vault(self.name) + + def upload_archive(self, filename, description=None): + """ + Adds an archive to a vault. For archives greater than 100MB the + multipart upload will be used. + + :type file: str + :param file: A filename to upload + + :type description: str + :param description: An optional description for the archive. + + :rtype: str + :return: The archive id of the newly created archive + """ + if os.path.getsize(filename) > self.SingleOperationThreshold: + return self.create_archive_from_file(filename, description=description) + return self._upload_archive_single_operation(filename, description) + + def _upload_archive_single_operation(self, filename, description): + """ + Adds an archive to a vault in a single operation. It's recommended for + archives less than 100MB + + :type file: str + :param file: A filename to upload + + :type description: str + :param description: A description for the archive. + + :rtype: str + :return: The archive id of the newly created archive + """ + with open(filename, 'rb') as fileobj: + linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj) + fileobj.seek(0) + response = self.layer1.upload_archive(self.name, fileobj, + linear_hash, tree_hash, + description) + return response['ArchiveId'] + + def create_archive_writer(self, part_size=DefaultPartSize, + description=None): + """ + Create a new archive and begin a multi-part upload to it. + Returns a file-like object to which the data for the archive + can be written. Once all the data is written the file-like + object should be closed, you can then call the get_archive_id + method on it to get the ID of the created archive. + + :type part_size: int + :param part_size: The part size for the multipart upload. + + :type description: str + :param description: An optional description for the archive. + + :rtype: :class:`boto.glacier.writer.Writer` + :return: A Writer object that to which the archive data + should be written. + """ + response = self.layer1.initiate_multipart_upload(self.name, + part_size, + description) + return Writer(self, response['UploadId'], part_size=part_size) + + def create_archive_from_file(self, filename=None, file_obj=None, + description=None, upload_id_callback=None): + """ + Create a new archive and upload the data from the given file + or file-like object. + + :type filename: str + :param filename: A filename to upload + + :type file_obj: file + :param file_obj: A file-like object to upload + + :type description: str + :param description: An optional description for the archive. + + :type upload_id_callback: function + :param upload_id_callback: if set, call with the upload_id as the + only parameter when it becomes known, to enable future calls + to resume_archive_from_file in case resume is needed. + + :rtype: str + :return: The archive id of the newly created archive + """ + part_size = self.DefaultPartSize + if not file_obj: + file_size = os.path.getsize(filename) + try: + part_size = minimum_part_size(file_size, part_size) + except ValueError: + raise UploadArchiveError("File size of %s bytes exceeds " + "40,000 GB archive limit of Glacier.") + file_obj = open(filename, "rb") + writer = self.create_archive_writer( + description=description, + part_size=part_size) + if upload_id_callback: + upload_id_callback(writer.upload_id) + while True: + data = file_obj.read(part_size) + if not data: + break + writer.write(data) + writer.close() + return writer.get_archive_id() + + @staticmethod + def _range_string_to_part_index(range_string, part_size): + start, inside_end = [int(value) for value in range_string.split('-')] + end = inside_end + 1 + length = end - start + if length == part_size + 1: + # Off-by-one bug in Amazon's Glacier implementation, + # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866 + # Workaround: since part_size is too big by one byte, adjust it + end -= 1 + inside_end -= 1 + length -= 1 + assert not (start % part_size), ( + "upload part start byte is not on a part boundary") + assert (length <= part_size), "upload part is bigger than part size" + return start // part_size + + def resume_archive_from_file(self, upload_id, filename=None, + file_obj=None): + """Resume upload of a file already part-uploaded to Glacier. + + The resumption of an upload where the part-uploaded section is empty + is a valid degenerate case that this function can handle. + + One and only one of filename or file_obj must be specified. + + :type upload_id: str + :param upload_id: existing Glacier upload id of upload being resumed. + + :type filename: str + :param filename: file to open for resume + + :type fobj: file + :param fobj: file-like object containing local data to resume. This + must read from the start of the entire upload, not just from the + point being resumed. Use fobj.seek(0) to achieve this if necessary. + + :rtype: str + :return: The archive id of the newly created archive + + """ + part_list_response = self.list_all_parts(upload_id) + part_size = part_list_response['PartSizeInBytes'] + + part_hash_map = {} + for part_desc in part_list_response['Parts']: + part_index = self._range_string_to_part_index( + part_desc['RangeInBytes'], part_size) + part_tree_hash = codecs.decode(part_desc['SHA256TreeHash'], 'hex_codec') + part_hash_map[part_index] = part_tree_hash + + if not file_obj: + file_obj = open(filename, "rb") + + return resume_file_upload( + self, upload_id, part_size, file_obj, part_hash_map) + + def concurrent_create_archive_from_file(self, filename, description, + **kwargs): + """ + Create a new archive from a file and upload the given + file. + + This is a convenience method around the + :class:`boto.glacier.concurrent.ConcurrentUploader` + class. This method will perform a multipart upload + and upload the parts of the file concurrently. + + :type filename: str + :param filename: A filename to upload + + :param kwargs: Additional kwargs to pass through to + :py:class:`boto.glacier.concurrent.ConcurrentUploader`. + You can pass any argument besides the ``api`` and + ``vault_name`` param (these arguments are already + passed to the ``ConcurrentUploader`` for you). + + :raises: `boto.glacier.exception.UploadArchiveError` is an error + occurs during the upload process. + + :rtype: str + :return: The archive id of the newly created archive + + """ + uploader = ConcurrentUploader(self.layer1, self.name, **kwargs) + archive_id = uploader.upload(filename, description) + return archive_id + + def retrieve_archive(self, archive_id, sns_topic=None, + description=None): + """ + Initiate a archive retrieval job to download the data from an + archive. You will need to wait for the notification from + Amazon (via SNS) before you can actually download the data, + this takes around 4 hours. + + :type archive_id: str + :param archive_id: The id of the archive + + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :rtype: :class:`boto.glacier.job.Job` + :return: A Job object representing the retrieval job. + """ + job_data = {'Type': 'archive-retrieval', + 'ArchiveId': archive_id} + if sns_topic is not None: + job_data['SNSTopic'] = sns_topic + if description is not None: + job_data['Description'] = description + + response = self.layer1.initiate_job(self.name, job_data) + return self.get_job(response['JobId']) + + def retrieve_inventory(self, sns_topic=None, + description=None, byte_range=None, + start_date=None, end_date=None, + limit=None): + """ + Initiate a inventory retrieval job to list the items in the + vault. You will need to wait for the notification from + Amazon (via SNS) before you can actually download the data, + this takes around 4 hours. + + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :type byte_range: str + :param byte_range: Range of bytes to retrieve. + + :type start_date: DateTime + :param start_date: Beginning of the date range to query. + + :type end_date: DateTime + :param end_date: End of the date range to query. + + :type limit: int + :param limit: Limits the number of results returned. + + :rtype: str + :return: The ID of the job + """ + job_data = {'Type': 'inventory-retrieval'} + if sns_topic is not None: + job_data['SNSTopic'] = sns_topic + if description is not None: + job_data['Description'] = description + if byte_range is not None: + job_data['RetrievalByteRange'] = byte_range + if start_date is not None or end_date is not None or limit is not None: + rparams = {} + + if start_date is not None: + rparams['StartDate'] = start_date.strftime('%Y-%m-%dT%H:%M:%S%Z') + if end_date is not None: + rparams['EndDate'] = end_date.strftime('%Y-%m-%dT%H:%M:%S%Z') + if limit is not None: + rparams['Limit'] = limit + + job_data['InventoryRetrievalParameters'] = rparams + + response = self.layer1.initiate_job(self.name, job_data) + return response['JobId'] + + def retrieve_inventory_job(self, **kwargs): + """ + Identical to ``retrieve_inventory``, but returns a ``Job`` instance + instead of just the job ID. + + :type description: str + :param description: An optional description for the job. + + :type sns_topic: str + :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier + sends notification when the job is completed and the output + is ready for you to download. + + :type byte_range: str + :param byte_range: Range of bytes to retrieve. + + :type start_date: DateTime + :param start_date: Beginning of the date range to query. + + :type end_date: DateTime + :param end_date: End of the date range to query. + + :type limit: int + :param limit: Limits the number of results returned. + + :rtype: :class:`boto.glacier.job.Job` + :return: A Job object representing the retrieval job. + """ + job_id = self.retrieve_inventory(**kwargs) + return self.get_job(job_id) + + def delete_archive(self, archive_id): + """ + This operation deletes an archive from the vault. + + :type archive_id: str + :param archive_id: The ID for the archive to be deleted. + """ + return self.layer1.delete_archive(self.name, archive_id) + + def get_job(self, job_id): + """ + Get an object representing a job in progress. + + :type job_id: str + :param job_id: The ID of the job + + :rtype: :class:`boto.glacier.job.Job` + :return: A Job object representing the job. + """ + response_data = self.layer1.describe_job(self.name, job_id) + return Job(self, response_data) + + def list_jobs(self, completed=None, status_code=None): + """ + Return a list of Job objects related to this vault. + + :type completed: boolean + :param completed: Specifies the state of the jobs to return. + If a value of True is passed, only completed jobs will + be returned. If a value of False is passed, only + uncompleted jobs will be returned. If no value is + passed, all jobs will be returned. + + :type status_code: string + :param status_code: Specifies the type of job status to return. + Valid values are: InProgress|Succeeded|Failed. If not + specified, jobs with all status codes are returned. + + :rtype: list of :class:`boto.glacier.job.Job` + :return: A list of Job objects related to this vault. + """ + response_data = self.layer1.list_jobs(self.name, completed, + status_code) + return [Job(self, jd) for jd in response_data['JobList']] + + def list_all_parts(self, upload_id): + """Automatically make and combine multiple calls to list_parts. + + Call list_parts as necessary, combining the results in case multiple + calls were required to get data on all available parts. + + """ + result = self.layer1.list_parts(self.name, upload_id) + marker = result['Marker'] + while marker: + additional_result = self.layer1.list_parts( + self.name, upload_id, marker=marker) + result['Parts'].extend(additional_result['Parts']) + marker = additional_result['Marker'] + # The marker makes no sense in an unpaginated result, and clearing it + # makes testing easier. This also has the nice property that the result + # is a normal (but expanded) response. + result['Marker'] = None + return result