comparison env/lib/python3.7/site-packages/boto/glacier/utils.py @ 5:9b1c78e6ba9c draft default tip

"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author shellac
date Mon, 01 Jun 2020 08:59:25 -0400
parents 79f47841a781
children
comparison
equal deleted inserted replaced
4:79f47841a781 5:9b1c78e6ba9c
1 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
2 #
3 # Permission is hereby granted, free of charge, to any person obtaining a
4 # copy of this software and associated documentation files (the
5 # "Software"), to deal in the Software without restriction, including
6 # without limitation the rights to use, copy, modify, merge, publish, dis-
7 # tribute, sublicense, and/or sell copies of the Software, and to permit
8 # persons to whom the Software is furnished to do so, subject to the fol-
9 # lowing conditions:
10 #
11 # The above copyright notice and this permission notice shall be included
12 # in all copies or substantial portions of the Software.
13 #
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20 # IN THE SOFTWARE.
21 #
22 import hashlib
23 import math
24 import binascii
25
26 from boto.compat import six
27
28
29 _MEGABYTE = 1024 * 1024
30 DEFAULT_PART_SIZE = 4 * _MEGABYTE
31 MAXIMUM_NUMBER_OF_PARTS = 10000
32
33
34 def minimum_part_size(size_in_bytes, default_part_size=DEFAULT_PART_SIZE):
35 """Calculate the minimum part size needed for a multipart upload.
36
37 Glacier allows a maximum of 10,000 parts per upload. It also
38 states that the maximum archive size is 10,000 * 4 GB, which means
39 the part size can range from 1MB to 4GB (provided it is one 1MB
40 multiplied by a power of 2).
41
42 This function will compute what the minimum part size must be in
43 order to upload a file of size ``size_in_bytes``.
44
45 It will first check if ``default_part_size`` is sufficient for
46 a part size given the ``size_in_bytes``. If this is not the case,
47 then the smallest part size than can accomodate a file of size
48 ``size_in_bytes`` will be returned.
49
50 If the file size is greater than the maximum allowed archive
51 size of 10,000 * 4GB, a ``ValueError`` will be raised.
52
53 """
54 # The default part size (4 MB) will be too small for a very large
55 # archive, as there is a limit of 10,000 parts in a multipart upload.
56 # This puts the maximum allowed archive size with the default part size
57 # at 40,000 MB. We need to do a sanity check on the part size, and find
58 # one that works if the default is too small.
59 part_size = _MEGABYTE
60 if (default_part_size * MAXIMUM_NUMBER_OF_PARTS) < size_in_bytes:
61 if size_in_bytes > (4096 * _MEGABYTE * 10000):
62 raise ValueError("File size too large: %s" % size_in_bytes)
63 min_part_size = size_in_bytes / 10000
64 power = 3
65 while part_size < min_part_size:
66 part_size = math.ldexp(_MEGABYTE, power)
67 power += 1
68 part_size = int(part_size)
69 else:
70 part_size = default_part_size
71 return part_size
72
73
74 def chunk_hashes(bytestring, chunk_size=_MEGABYTE):
75 chunk_count = int(math.ceil(len(bytestring) / float(chunk_size)))
76 hashes = []
77 for i in range(chunk_count):
78 start = i * chunk_size
79 end = (i + 1) * chunk_size
80 hashes.append(hashlib.sha256(bytestring[start:end]).digest())
81 if not hashes:
82 return [hashlib.sha256(b'').digest()]
83 return hashes
84
85
86 def tree_hash(fo):
87 """
88 Given a hash of each 1MB chunk (from chunk_hashes) this will hash
89 together adjacent hashes until it ends up with one big one. So a
90 tree of hashes.
91 """
92 hashes = []
93 hashes.extend(fo)
94 while len(hashes) > 1:
95 new_hashes = []
96 while True:
97 if len(hashes) > 1:
98 first = hashes.pop(0)
99 second = hashes.pop(0)
100 new_hashes.append(hashlib.sha256(first + second).digest())
101 elif len(hashes) == 1:
102 only = hashes.pop(0)
103 new_hashes.append(only)
104 else:
105 break
106 hashes.extend(new_hashes)
107 return hashes[0]
108
109
110 def compute_hashes_from_fileobj(fileobj, chunk_size=1024 * 1024):
111 """Compute the linear and tree hash from a fileobj.
112
113 This function will compute the linear/tree hash of a fileobj
114 in a single pass through the fileobj.
115
116 :param fileobj: A file like object.
117
118 :param chunk_size: The size of the chunks to use for the tree
119 hash. This is also the buffer size used to read from
120 `fileobj`.
121
122 :rtype: tuple
123 :return: A tuple of (linear_hash, tree_hash). Both hashes
124 are returned in hex.
125
126 """
127 # Python 3+, not binary
128 if six.PY3 and hasattr(fileobj, 'mode') and 'b' not in fileobj.mode:
129 raise ValueError('File-like object must be opened in binary mode!')
130
131 linear_hash = hashlib.sha256()
132 chunks = []
133 chunk = fileobj.read(chunk_size)
134 while chunk:
135 # It's possible to get a file-like object that has no mode (checked
136 # above) and returns something other than bytes (e.g. str). So here
137 # we try to catch that and encode to bytes.
138 if not isinstance(chunk, bytes):
139 chunk = chunk.encode(getattr(fileobj, 'encoding', '') or 'utf-8')
140 linear_hash.update(chunk)
141 chunks.append(hashlib.sha256(chunk).digest())
142 chunk = fileobj.read(chunk_size)
143 if not chunks:
144 chunks = [hashlib.sha256(b'').digest()]
145 return linear_hash.hexdigest(), bytes_to_hex(tree_hash(chunks))
146
147
148 def bytes_to_hex(str_as_bytes):
149 return binascii.hexlify(str_as_bytes)
150
151
152 def tree_hash_from_str(str_as_bytes):
153 """
154
155 :type str_as_bytes: str
156 :param str_as_bytes: The string for which to compute the tree hash.
157
158 :rtype: str
159 :return: The computed tree hash, returned as hex.
160
161 """
162 return bytes_to_hex(tree_hash(chunk_hashes(str_as_bytes)))
163
164
165 class ResettingFileSender(object):
166 def __init__(self, archive):
167 self._archive = archive
168 self._starting_offset = archive.tell()
169
170 def __call__(self, connection, method, path, body, headers):
171 try:
172 connection.request(method, path, self._archive, headers)
173 return connection.getresponse()
174 finally:
175 self._archive.seek(self._starting_offset)