guppy_basecaller: env/lib/python3.7/site-packages/boto/glacier/utils.py comparison

comparison env/lib/python3.7/site-packages/boto/glacier/utils.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"

author	shellac
date	Sat, 02 May 2020 07:14:21 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:26e78fe6e8c4
+# Copyright (c) 2012 Amazon.com, Inc. or its affiliates.  All Rights Reserved
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish, dis-
+# tribute, sublicense, and/or sell copies of the Software, and to permit
+# persons to whom the Software is furnished to do so, subject to the fol-
+# lowing conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
+# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+#
+import hashlib
+import math
+import binascii
+from boto.compat import six
+_MEGABYTE = 1024 * 1024
+DEFAULT_PART_SIZE = 4 * _MEGABYTE
+MAXIMUM_NUMBER_OF_PARTS = 10000
+def minimum_part_size(size_in_bytes, default_part_size=DEFAULT_PART_SIZE):
+"""Calculate the minimum part size needed for a multipart upload.
+Glacier allows a maximum of 10,000 parts per upload.  It also
+states that the maximum archive size is 10,000 * 4 GB, which means
+the part size can range from 1MB to 4GB (provided it is one 1MB
+multiplied by a power of 2).
+This function will compute what the minimum part size must be in
+order to upload a file of size ``size_in_bytes``.
+It will first check if ``default_part_size`` is sufficient for
+a part size given the ``size_in_bytes``.  If this is not the case,
+then the smallest part size than can accomodate a file of size
+``size_in_bytes`` will be returned.
+If the file size is greater than the maximum allowed archive
+size of 10,000 * 4GB, a ``ValueError`` will be raised.
+"""
+# The default part size (4 MB) will be too small for a very large
+# archive, as there is a limit of 10,000 parts in a multipart upload.
+# This puts the maximum allowed archive size with the default part size
+# at 40,000 MB. We need to do a sanity check on the part size, and find
+# one that works if the default is too small.
+part_size = _MEGABYTE
+if (default_part_size * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes:
+if size_in_bytes > (4096 * _MEGABYTE * 10000):
+raise ValueError("File size too large: %s" % size_in_bytes)
+min_part_size = size_in_bytes / 10000
+power = 3
+while part_size < min_part_size:
+part_size = math.ldexp(_MEGABYTE, power)
+power += 1
+part_size = int(part_size)
+else:
+part_size = default_part_size
+return part_size
+def chunk_hashes(bytestring, chunk_size=_MEGABYTE):
+chunk_count = int(math.ceil(len(bytestring) / float(chunk_size)))
+hashes = []
+for i in range(chunk_count):
+start = i * chunk_size
+end = (i + 1) * chunk_size
+hashes.append(hashlib.sha256(bytestring[start:end]).digest())
+if not hashes:
+return [hashlib.sha256(b'').digest()]
+return hashes
+def tree_hash(fo):
+"""
+Given a hash of each 1MB chunk (from chunk_hashes) this will hash
+together adjacent hashes until it ends up with one big one. So a
+tree of hashes.
+"""
+hashes = []
+hashes.extend(fo)
+while len(hashes) > 1:
+new_hashes = []
+while True:
+if len(hashes) > 1:
+first = hashes.pop(0)
+second = hashes.pop(0)
+new_hashes.append(hashlib.sha256(first + second).digest())
+elif len(hashes) == 1:
+only = hashes.pop(0)
+new_hashes.append(only)
+else:
+break
+hashes.extend(new_hashes)
+return hashes[0]
+def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024):
+"""Compute the linear and tree hash from a fileobj.
+This function will compute the linear/tree hash of a fileobj
+in a single pass through the fileobj.
+:param fileobj: A file like object.
+:param chunk_size: The size of the chunks to use for the tree
+hash.  This is also the buffer size used to read from
+`fileobj`.
+:rtype: tuple
+:return: A tuple of (linear_hash, tree_hash).  Both hashes
+are returned in hex.
+"""
+# Python 3+, not binary
+if six.PY3 and hasattr(fileobj, 'mode') and 'b' not in fileobj.mode:
+raise ValueError('File-like object must be opened in binary mode!')
+linear_hash = hashlib.sha256()
+chunks = []
+chunk = fileobj.read(chunk_size)
+while chunk:
+# It's possible to get a file-like object that has no mode (checked
+# above) and returns something other than bytes (e.g. str). So here
+# we try to catch that and encode to bytes.
+if not isinstance(chunk, bytes):
+chunk = chunk.encode(getattr(fileobj, 'encoding', '') or 'utf-8')
+linear_hash.update(chunk)
+chunks.append(hashlib.sha256(chunk).digest())
+chunk = fileobj.read(chunk_size)
+if not chunks:
+chunks = [hashlib.sha256(b'').digest()]
+return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks))
+def bytes_to_hex(str_as_bytes):
+return binascii.hexlify(str_as_bytes)
+def tree_hash_from_str(str_as_bytes):
+"""
+:type str_as_bytes: str
+:param str_as_bytes: The string for which to compute the tree hash.
+:rtype: str
+:return: The computed tree hash, returned as hex.
+"""
+return bytes_to_hex(tree_hash(chunk_hashes(str_as_bytes)))
+class ResettingFileSender(object):
+def __init__(self, archive):
+self._archive = archive
+self._starting_offset = archive.tell()
+def __call__(self, connection, method, path, body, headers):
+try:
+connection.request(method, path, self._archive, headers)
+return connection.getresponse()
+finally:
+self._archive.seek(self._starting_offset)

Mercurial > repos > shellac > guppy_basecaller

comparison env/lib/python3.7/site-packages/boto/glacier/utils.py @ 0:26e78fe6e8c4 draft