Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/boto/glacier/utils.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/boto/glacier/utils.py Fri Jul 31 00:18:57 2020 -0400 @@ -0,0 +1,175 @@ +# Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +import hashlib +import math +import binascii + +from boto.compat import six + + +_MEGABYTE = 1024 * 1024 +DEFAULT_PART_SIZE = 4 * _MEGABYTE +MAXIMUM_NUMBER_OF_PARTS = 10000 + + +def minimum_part_size(size_in_bytes, default_part_size=DEFAULT_PART_SIZE): + """Calculate the minimum part size needed for a multipart upload. + + Glacier allows a maximum of 10,000 parts per upload. It also + states that the maximum archive size is 10,000 * 4 GB, which means + the part size can range from 1MB to 4GB (provided it is one 1MB + multiplied by a power of 2). + + This function will compute what the minimum part size must be in + order to upload a file of size ``size_in_bytes``. + + It will first check if ``default_part_size`` is sufficient for + a part size given the ``size_in_bytes``. If this is not the case, + then the smallest part size than can accomodate a file of size + ``size_in_bytes`` will be returned. + + If the file size is greater than the maximum allowed archive + size of 10,000 * 4GB, a ``ValueError`` will be raised. + + """ + # The default part size (4 MB) will be too small for a very large + # archive, as there is a limit of 10,000 parts in a multipart upload. + # This puts the maximum allowed archive size with the default part size + # at 40,000 MB. We need to do a sanity check on the part size, and find + # one that works if the default is too small. + part_size = _MEGABYTE + if (default_part_size * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes: + if size_in_bytes > (4096 * _MEGABYTE * 10000): + raise ValueError("File size too large: %s" % size_in_bytes) + min_part_size = size_in_bytes / 10000 + power = 3 + while part_size < min_part_size: + part_size = math.ldexp(_MEGABYTE, power) + power += 1 + part_size = int(part_size) + else: + part_size = default_part_size + return part_size + + +def chunk_hashes(bytestring, chunk_size=_MEGABYTE): + chunk_count = int(math.ceil(len(bytestring) / float(chunk_size))) + hashes = [] + for i in range(chunk_count): + start = i * chunk_size + end = (i + 1) * chunk_size + hashes.append(hashlib.sha256(bytestring[start:end]).digest()) + if not hashes: + return [hashlib.sha256(b'').digest()] + return hashes + + +def tree_hash(fo): + """ + Given a hash of each 1MB chunk (from chunk_hashes) this will hash + together adjacent hashes until it ends up with one big one. So a + tree of hashes. + """ + hashes = [] + hashes.extend(fo) + while len(hashes) > 1: + new_hashes = [] + while True: + if len(hashes) > 1: + first = hashes.pop(0) + second = hashes.pop(0) + new_hashes.append(hashlib.sha256(first + second).digest()) + elif len(hashes) == 1: + only = hashes.pop(0) + new_hashes.append(only) + else: + break + hashes.extend(new_hashes) + return hashes[0] + + +def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024): + """Compute the linear and tree hash from a fileobj. + + This function will compute the linear/tree hash of a fileobj + in a single pass through the fileobj. + + :param fileobj: A file like object. + + :param chunk_size: The size of the chunks to use for the tree + hash. This is also the buffer size used to read from + `fileobj`. + + :rtype: tuple + :return: A tuple of (linear_hash, tree_hash). Both hashes + are returned in hex. + + """ + # Python 3+, not binary + if six.PY3 and hasattr(fileobj, 'mode') and 'b' not in fileobj.mode: + raise ValueError('File-like object must be opened in binary mode!') + + linear_hash = hashlib.sha256() + chunks = [] + chunk = fileobj.read(chunk_size) + while chunk: + # It's possible to get a file-like object that has no mode (checked + # above) and returns something other than bytes (e.g. str). So here + # we try to catch that and encode to bytes. + if not isinstance(chunk, bytes): + chunk = chunk.encode(getattr(fileobj, 'encoding', '') or 'utf-8') + linear_hash.update(chunk) + chunks.append(hashlib.sha256(chunk).digest()) + chunk = fileobj.read(chunk_size) + if not chunks: + chunks = [hashlib.sha256(b'').digest()] + return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks)) + + +def bytes_to_hex(str_as_bytes): + return binascii.hexlify(str_as_bytes) + + +def tree_hash_from_str(str_as_bytes): + """ + + :type str_as_bytes: str + :param str_as_bytes: The string for which to compute the tree hash. + + :rtype: str + :return: The computed tree hash, returned as hex. + + """ + return bytes_to_hex(tree_hash(chunk_hashes(str_as_bytes))) + + +class ResettingFileSender(object): + def __init__(self, archive): + self._archive = archive + self._starting_offset = archive.tell() + + def __call__(self, connection, method, path, body, headers): + try: + connection.request(method, path, self._archive, headers) + return connection.getresponse() + finally: + self._archive.seek(self._starting_offset)