Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/boto/glacier/utils.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d30785e31577 |
|---|---|
| 1 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved | |
| 2 # | |
| 3 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 4 # copy of this software and associated documentation files (the | |
| 5 # "Software"), to deal in the Software without restriction, including | |
| 6 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 7 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 8 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 9 # lowing conditions: | |
| 10 # | |
| 11 # The above copyright notice and this permission notice shall be included | |
| 12 # in all copies or substantial portions of the Software. | |
| 13 # | |
| 14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 20 # IN THE SOFTWARE. | |
| 21 # | |
| 22 import hashlib | |
| 23 import math | |
| 24 import binascii | |
| 25 | |
| 26 from boto.compat import six | |
| 27 | |
| 28 | |
| 29 _MEGABYTE = 1024 * 1024 | |
| 30 DEFAULT_PART_SIZE = 4 * _MEGABYTE | |
| 31 MAXIMUM_NUMBER_OF_PARTS = 10000 | |
| 32 | |
| 33 | |
| 34 def minimum_part_size(size_in_bytes, default_part_size=DEFAULT_PART_SIZE): | |
| 35 """Calculate the minimum part size needed for a multipart upload. | |
| 36 | |
| 37 Glacier allows a maximum of 10,000 parts per upload. It also | |
| 38 states that the maximum archive size is 10,000 * 4 GB, which means | |
| 39 the part size can range from 1MB to 4GB (provided it is one 1MB | |
| 40 multiplied by a power of 2). | |
| 41 | |
| 42 This function will compute what the minimum part size must be in | |
| 43 order to upload a file of size ``size_in_bytes``. | |
| 44 | |
| 45 It will first check if ``default_part_size`` is sufficient for | |
| 46 a part size given the ``size_in_bytes``. If this is not the case, | |
| 47 then the smallest part size than can accomodate a file of size | |
| 48 ``size_in_bytes`` will be returned. | |
| 49 | |
| 50 If the file size is greater than the maximum allowed archive | |
| 51 size of 10,000 * 4GB, a ``ValueError`` will be raised. | |
| 52 | |
| 53 """ | |
| 54 # The default part size (4 MB) will be too small for a very large | |
| 55 # archive, as there is a limit of 10,000 parts in a multipart upload. | |
| 56 # This puts the maximum allowed archive size with the default part size | |
| 57 # at 40,000 MB. We need to do a sanity check on the part size, and find | |
| 58 # one that works if the default is too small. | |
| 59 part_size = _MEGABYTE | |
| 60 if (default_part_size * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes: | |
| 61 if size_in_bytes > (4096 * _MEGABYTE * 10000): | |
| 62 raise ValueError("File size too large: %s" % size_in_bytes) | |
| 63 min_part_size = size_in_bytes / 10000 | |
| 64 power = 3 | |
| 65 while part_size < min_part_size: | |
| 66 part_size = math.ldexp(_MEGABYTE, power) | |
| 67 power += 1 | |
| 68 part_size = int(part_size) | |
| 69 else: | |
| 70 part_size = default_part_size | |
| 71 return part_size | |
| 72 | |
| 73 | |
| 74 def chunk_hashes(bytestring, chunk_size=_MEGABYTE): | |
| 75 chunk_count = int(math.ceil(len(bytestring) / float(chunk_size))) | |
| 76 hashes = [] | |
| 77 for i in range(chunk_count): | |
| 78 start = i * chunk_size | |
| 79 end = (i + 1) * chunk_size | |
| 80 hashes.append(hashlib.sha256(bytestring[start:end]).digest()) | |
| 81 if not hashes: | |
| 82 return [hashlib.sha256(b'').digest()] | |
| 83 return hashes | |
| 84 | |
| 85 | |
| 86 def tree_hash(fo): | |
| 87 """ | |
| 88 Given a hash of each 1MB chunk (from chunk_hashes) this will hash | |
| 89 together adjacent hashes until it ends up with one big one. So a | |
| 90 tree of hashes. | |
| 91 """ | |
| 92 hashes = [] | |
| 93 hashes.extend(fo) | |
| 94 while len(hashes) > 1: | |
| 95 new_hashes = [] | |
| 96 while True: | |
| 97 if len(hashes) > 1: | |
| 98 first = hashes.pop(0) | |
| 99 second = hashes.pop(0) | |
| 100 new_hashes.append(hashlib.sha256(first + second).digest()) | |
| 101 elif len(hashes) == 1: | |
| 102 only = hashes.pop(0) | |
| 103 new_hashes.append(only) | |
| 104 else: | |
| 105 break | |
| 106 hashes.extend(new_hashes) | |
| 107 return hashes[0] | |
| 108 | |
| 109 | |
| 110 def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024): | |
| 111 """Compute the linear and tree hash from a fileobj. | |
| 112 | |
| 113 This function will compute the linear/tree hash of a fileobj | |
| 114 in a single pass through the fileobj. | |
| 115 | |
| 116 :param fileobj: A file like object. | |
| 117 | |
| 118 :param chunk_size: The size of the chunks to use for the tree | |
| 119 hash. This is also the buffer size used to read from | |
| 120 `fileobj`. | |
| 121 | |
| 122 :rtype: tuple | |
| 123 :return: A tuple of (linear_hash, tree_hash). Both hashes | |
| 124 are returned in hex. | |
| 125 | |
| 126 """ | |
| 127 # Python 3+, not binary | |
| 128 if six.PY3 and hasattr(fileobj, 'mode') and 'b' not in fileobj.mode: | |
| 129 raise ValueError('File-like object must be opened in binary mode!') | |
| 130 | |
| 131 linear_hash = hashlib.sha256() | |
| 132 chunks = [] | |
| 133 chunk = fileobj.read(chunk_size) | |
| 134 while chunk: | |
| 135 # It's possible to get a file-like object that has no mode (checked | |
| 136 # above) and returns something other than bytes (e.g. str). So here | |
| 137 # we try to catch that and encode to bytes. | |
| 138 if not isinstance(chunk, bytes): | |
| 139 chunk = chunk.encode(getattr(fileobj, 'encoding', '') or 'utf-8') | |
| 140 linear_hash.update(chunk) | |
| 141 chunks.append(hashlib.sha256(chunk).digest()) | |
| 142 chunk = fileobj.read(chunk_size) | |
| 143 if not chunks: | |
| 144 chunks = [hashlib.sha256(b'').digest()] | |
| 145 return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks)) | |
| 146 | |
| 147 | |
| 148 def bytes_to_hex(str_as_bytes): | |
| 149 return binascii.hexlify(str_as_bytes) | |
| 150 | |
| 151 | |
| 152 def tree_hash_from_str(str_as_bytes): | |
| 153 """ | |
| 154 | |
| 155 :type str_as_bytes: str | |
| 156 :param str_as_bytes: The string for which to compute the tree hash. | |
| 157 | |
| 158 :rtype: str | |
| 159 :return: The computed tree hash, returned as hex. | |
| 160 | |
| 161 """ | |
| 162 return bytes_to_hex(tree_hash(chunk_hashes(str_as_bytes))) | |
| 163 | |
| 164 | |
| 165 class ResettingFileSender(object): | |
| 166 def __init__(self, archive): | |
| 167 self._archive = archive | |
| 168 self._starting_offset = archive.tell() | |
| 169 | |
| 170 def __call__(self, connection, method, path, body, headers): | |
| 171 try: | |
| 172 connection.request(method, path, self._archive, headers) | |
| 173 return connection.getresponse() | |
| 174 finally: | |
| 175 self._archive.seek(self._starting_offset) |
