Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/glacier/writer.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author | shellac |
---|---|
date | Sat, 02 May 2020 07:14:21 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:26e78fe6e8c4 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/ | |
3 # Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk> | |
4 # Tree hash implementation from Aaron Brady bradya@gmail.com | |
5 # | |
6 # Permission is hereby granted, free of charge, to any person obtaining a | |
7 # copy of this software and associated documentation files (the | |
8 # "Software"), to deal in the Software without restriction, including | |
9 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
10 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
11 # persons to whom the Software is furnished to do so, subject to the fol- | |
12 # lowing conditions: | |
13 # | |
14 # The above copyright notice and this permission notice shall be included | |
15 # in all copies or substantial portions of the Software. | |
16 # | |
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
18 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
19 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
20 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
21 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
23 # IN THE SOFTWARE. | |
24 # | |
25 import hashlib | |
26 | |
27 from boto.glacier.utils import chunk_hashes, tree_hash, bytes_to_hex | |
28 # This import is provided for backwards compatibility. This function is | |
29 # now in boto.glacier.utils, but any existing code can still import | |
30 # this directly from this module. | |
31 from boto.glacier.utils import compute_hashes_from_fileobj | |
32 | |
33 | |
34 _ONE_MEGABYTE = 1024 * 1024 | |
35 | |
36 | |
37 class _Partitioner(object): | |
38 """Convert variable-size writes into part-sized writes | |
39 | |
40 Call write(data) with variable sized data as needed to write all data. Call | |
41 flush() after all data is written. | |
42 | |
43 This instance will call send_fn(part_data) as needed in part_size pieces, | |
44 except for the final part which may be shorter than part_size. Make sure to | |
45 call flush() to ensure that a short final part results in a final send_fn | |
46 call. | |
47 | |
48 """ | |
49 def __init__(self, part_size, send_fn): | |
50 self.part_size = part_size | |
51 self.send_fn = send_fn | |
52 self._buffer = [] | |
53 self._buffer_size = 0 | |
54 | |
55 def write(self, data): | |
56 if data == b'': | |
57 return | |
58 self._buffer.append(data) | |
59 self._buffer_size += len(data) | |
60 while self._buffer_size > self.part_size: | |
61 self._send_part() | |
62 | |
63 def _send_part(self): | |
64 data = b''.join(self._buffer) | |
65 # Put back any data remaining over the part size into the | |
66 # buffer | |
67 if len(data) > self.part_size: | |
68 self._buffer = [data[self.part_size:]] | |
69 self._buffer_size = len(self._buffer[0]) | |
70 else: | |
71 self._buffer = [] | |
72 self._buffer_size = 0 | |
73 # The part we will send | |
74 part = data[:self.part_size] | |
75 self.send_fn(part) | |
76 | |
77 def flush(self): | |
78 if self._buffer_size > 0: | |
79 self._send_part() | |
80 | |
81 | |
82 class _Uploader(object): | |
83 """Upload to a Glacier upload_id. | |
84 | |
85 Call upload_part for each part (in any order) and then close to complete | |
86 the upload. | |
87 | |
88 """ | |
89 def __init__(self, vault, upload_id, part_size, chunk_size=_ONE_MEGABYTE): | |
90 self.vault = vault | |
91 self.upload_id = upload_id | |
92 self.part_size = part_size | |
93 self.chunk_size = chunk_size | |
94 self.archive_id = None | |
95 | |
96 self._uploaded_size = 0 | |
97 self._tree_hashes = [] | |
98 | |
99 self.closed = False | |
100 | |
101 def _insert_tree_hash(self, index, raw_tree_hash): | |
102 list_length = len(self._tree_hashes) | |
103 if index >= list_length: | |
104 self._tree_hashes.extend([None] * (list_length - index + 1)) | |
105 self._tree_hashes[index] = raw_tree_hash | |
106 | |
107 def upload_part(self, part_index, part_data): | |
108 """Upload a part to Glacier. | |
109 | |
110 :param part_index: part number where 0 is the first part | |
111 :param part_data: data to upload corresponding to this part | |
112 | |
113 """ | |
114 if self.closed: | |
115 raise ValueError("I/O operation on closed file") | |
116 # Create a request and sign it | |
117 part_tree_hash = tree_hash(chunk_hashes(part_data, self.chunk_size)) | |
118 self._insert_tree_hash(part_index, part_tree_hash) | |
119 | |
120 hex_tree_hash = bytes_to_hex(part_tree_hash) | |
121 linear_hash = hashlib.sha256(part_data).hexdigest() | |
122 start = self.part_size * part_index | |
123 content_range = (start, | |
124 (start + len(part_data)) - 1) | |
125 response = self.vault.layer1.upload_part(self.vault.name, | |
126 self.upload_id, | |
127 linear_hash, | |
128 hex_tree_hash, | |
129 content_range, part_data) | |
130 response.read() | |
131 self._uploaded_size += len(part_data) | |
132 | |
133 def skip_part(self, part_index, part_tree_hash, part_length): | |
134 """Skip uploading of a part. | |
135 | |
136 The final close call needs to calculate the tree hash and total size | |
137 of all uploaded data, so this is the mechanism for resume | |
138 functionality to provide it without actually uploading the data again. | |
139 | |
140 :param part_index: part number where 0 is the first part | |
141 :param part_tree_hash: binary tree_hash of part being skipped | |
142 :param part_length: length of part being skipped | |
143 | |
144 """ | |
145 if self.closed: | |
146 raise ValueError("I/O operation on closed file") | |
147 self._insert_tree_hash(part_index, part_tree_hash) | |
148 self._uploaded_size += part_length | |
149 | |
150 def close(self): | |
151 if self.closed: | |
152 return | |
153 if None in self._tree_hashes: | |
154 raise RuntimeError("Some parts were not uploaded.") | |
155 # Complete the multiplart glacier upload | |
156 hex_tree_hash = bytes_to_hex(tree_hash(self._tree_hashes)) | |
157 response = self.vault.layer1.complete_multipart_upload( | |
158 self.vault.name, self.upload_id, hex_tree_hash, | |
159 self._uploaded_size) | |
160 self.archive_id = response['ArchiveId'] | |
161 self.closed = True | |
162 | |
163 | |
164 def generate_parts_from_fobj(fobj, part_size): | |
165 data = fobj.read(part_size) | |
166 while data: | |
167 yield data.encode('utf-8') | |
168 data = fobj.read(part_size) | |
169 | |
170 | |
171 def resume_file_upload(vault, upload_id, part_size, fobj, part_hash_map, | |
172 chunk_size=_ONE_MEGABYTE): | |
173 """Resume upload of a file already part-uploaded to Glacier. | |
174 | |
175 The resumption of an upload where the part-uploaded section is empty is a | |
176 valid degenerate case that this function can handle. In this case, | |
177 part_hash_map should be an empty dict. | |
178 | |
179 :param vault: boto.glacier.vault.Vault object. | |
180 :param upload_id: existing Glacier upload id of upload being resumed. | |
181 :param part_size: part size of existing upload. | |
182 :param fobj: file object containing local data to resume. This must read | |
183 from the start of the entire upload, not just from the point being | |
184 resumed. Use fobj.seek(0) to achieve this if necessary. | |
185 :param part_hash_map: {part_index: part_tree_hash, ...} of data already | |
186 uploaded. Each supplied part_tree_hash will be verified and the part | |
187 re-uploaded if there is a mismatch. | |
188 :param chunk_size: chunk size of tree hash calculation. This must be | |
189 1 MiB for Amazon. | |
190 | |
191 """ | |
192 uploader = _Uploader(vault, upload_id, part_size, chunk_size) | |
193 for part_index, part_data in enumerate( | |
194 generate_parts_from_fobj(fobj, part_size)): | |
195 part_tree_hash = tree_hash(chunk_hashes(part_data, chunk_size)) | |
196 if (part_index not in part_hash_map or | |
197 part_hash_map[part_index] != part_tree_hash): | |
198 uploader.upload_part(part_index, part_data) | |
199 else: | |
200 uploader.skip_part(part_index, part_tree_hash, len(part_data)) | |
201 uploader.close() | |
202 return uploader.archive_id | |
203 | |
204 | |
205 class Writer(object): | |
206 """ | |
207 Presents a file-like object for writing to a Amazon Glacier | |
208 Archive. The data is written using the multi-part upload API. | |
209 """ | |
210 def __init__(self, vault, upload_id, part_size, chunk_size=_ONE_MEGABYTE): | |
211 self.uploader = _Uploader(vault, upload_id, part_size, chunk_size) | |
212 self.partitioner = _Partitioner(part_size, self._upload_part) | |
213 self.closed = False | |
214 self.next_part_index = 0 | |
215 | |
216 def write(self, data): | |
217 if self.closed: | |
218 raise ValueError("I/O operation on closed file") | |
219 self.partitioner.write(data) | |
220 | |
221 def _upload_part(self, part_data): | |
222 self.uploader.upload_part(self.next_part_index, part_data) | |
223 self.next_part_index += 1 | |
224 | |
225 def close(self): | |
226 if self.closed: | |
227 return | |
228 self.partitioner.flush() | |
229 self.uploader.close() | |
230 self.closed = True | |
231 | |
232 def get_archive_id(self): | |
233 self.close() | |
234 return self.uploader.archive_id | |
235 | |
236 @property | |
237 def current_tree_hash(self): | |
238 """ | |
239 Returns the current tree hash for the data that's been written | |
240 **so far**. | |
241 | |
242 Only once the writing is complete is the final tree hash returned. | |
243 """ | |
244 return tree_hash(self.uploader._tree_hashes) | |
245 | |
246 @property | |
247 def current_uploaded_size(self): | |
248 """ | |
249 Returns the current uploaded size for the data that's been written | |
250 **so far**. | |
251 | |
252 Only once the writing is complete is the final uploaded size returned. | |
253 """ | |
254 return self.uploader._uploaded_size | |
255 | |
256 @property | |
257 def upload_id(self): | |
258 return self.uploader.upload_id | |
259 | |
260 @property | |
261 def vault(self): | |
262 return self.uploader.vault |