Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/boto/gs/key.py @ 2:6af9afd405e9 draft
"planemo upload commit 0a63dd5f4d38a1f6944587f52a8cd79874177fc1"
| author | shellac |
|---|---|
| date | Thu, 14 May 2020 14:56:58 -0400 |
| parents | 26e78fe6e8c4 |
| children |
comparison
equal
deleted
inserted
replaced
| 1:75ca89e9b81c | 2:6af9afd405e9 |
|---|---|
| 1 # Copyright 2010 Google Inc. | |
| 2 # | |
| 3 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 4 # copy of this software and associated documentation files (the | |
| 5 # "Software"), to deal in the Software without restriction, including | |
| 6 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 7 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 8 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 9 # lowing conditions: | |
| 10 # | |
| 11 # The above copyright notice and this permission notice shall be included | |
| 12 # in all copies or substantial portions of the Software. | |
| 13 # | |
| 14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 15 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 16 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 17 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 18 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 20 # IN THE SOFTWARE. | |
| 21 | |
| 22 import base64 | |
| 23 import binascii | |
| 24 import os | |
| 25 import re | |
| 26 | |
| 27 from boto.compat import StringIO | |
| 28 from boto.exception import BotoClientError | |
| 29 from boto.s3.key import Key as S3Key | |
| 30 from boto.s3.keyfile import KeyFile | |
| 31 from boto.utils import compute_hash | |
| 32 from boto.utils import get_utf8_value | |
| 33 | |
| 34 class Key(S3Key): | |
| 35 """ | |
| 36 Represents a key (object) in a GS bucket. | |
| 37 | |
| 38 :ivar bucket: The parent :class:`boto.gs.bucket.Bucket`. | |
| 39 :ivar name: The name of this Key object. | |
| 40 :ivar metadata: A dictionary containing user metadata that you | |
| 41 wish to store with the object or that has been retrieved from | |
| 42 an existing object. | |
| 43 :ivar cache_control: The value of the `Cache-Control` HTTP header. | |
| 44 :ivar content_type: The value of the `Content-Type` HTTP header. | |
| 45 :ivar content_encoding: The value of the `Content-Encoding` HTTP header. | |
| 46 :ivar content_disposition: The value of the `Content-Disposition` HTTP | |
| 47 header. | |
| 48 :ivar content_language: The value of the `Content-Language` HTTP header. | |
| 49 :ivar etag: The `etag` associated with this object. | |
| 50 :ivar last_modified: The string timestamp representing the last | |
| 51 time this object was modified in GS. | |
| 52 :ivar owner: The ID of the owner of this object. | |
| 53 :ivar storage_class: The storage class of the object. Currently, one of: | |
| 54 STANDARD | DURABLE_REDUCED_AVAILABILITY. | |
| 55 :ivar md5: The MD5 hash of the contents of the object. | |
| 56 :ivar size: The size, in bytes, of the object. | |
| 57 :ivar generation: The generation number of the object. | |
| 58 :ivar metageneration: The generation number of the object metadata. | |
| 59 :ivar encrypted: Whether the object is encrypted while at rest on | |
| 60 the server. | |
| 61 :ivar cloud_hashes: Dictionary of checksums as supplied by the storage | |
| 62 provider. | |
| 63 """ | |
| 64 | |
| 65 def __init__(self, bucket=None, name=None, generation=None): | |
| 66 super(Key, self).__init__(bucket=bucket, name=name) | |
| 67 self.generation = generation | |
| 68 self.meta_generation = None | |
| 69 self.cloud_hashes = {} | |
| 70 self.component_count = None | |
| 71 | |
| 72 def __repr__(self): | |
| 73 if self.generation and self.metageneration: | |
| 74 ver_str = '#%s.%s' % (self.generation, self.metageneration) | |
| 75 else: | |
| 76 ver_str = '' | |
| 77 if self.bucket: | |
| 78 return '<Key: %s,%s%s>' % (self.bucket.name, self.name, ver_str) | |
| 79 else: | |
| 80 return '<Key: None,%s%s>' % (self.name, ver_str) | |
| 81 | |
| 82 def endElement(self, name, value, connection): | |
| 83 if name == 'Key': | |
| 84 self.name = value | |
| 85 elif name == 'ETag': | |
| 86 self.etag = value | |
| 87 elif name == 'IsLatest': | |
| 88 if value == 'true': | |
| 89 self.is_latest = True | |
| 90 else: | |
| 91 self.is_latest = False | |
| 92 elif name == 'LastModified': | |
| 93 self.last_modified = value | |
| 94 elif name == 'Size': | |
| 95 self.size = int(value) | |
| 96 elif name == 'StorageClass': | |
| 97 self.storage_class = value | |
| 98 elif name == 'Owner': | |
| 99 pass | |
| 100 elif name == 'VersionId': | |
| 101 self.version_id = value | |
| 102 elif name == 'Generation': | |
| 103 self.generation = value | |
| 104 elif name == 'MetaGeneration': | |
| 105 self.metageneration = value | |
| 106 else: | |
| 107 setattr(self, name, value) | |
| 108 | |
| 109 def handle_version_headers(self, resp, force=False): | |
| 110 self.metageneration = resp.getheader('x-goog-metageneration', None) | |
| 111 self.generation = resp.getheader('x-goog-generation', None) | |
| 112 | |
| 113 def handle_restore_headers(self, response): | |
| 114 return | |
| 115 | |
| 116 def handle_addl_headers(self, headers): | |
| 117 for key, value in headers: | |
| 118 if key == 'x-goog-hash': | |
| 119 for hash_pair in value.split(','): | |
| 120 alg, b64_digest = hash_pair.strip().split('=', 1) | |
| 121 self.cloud_hashes[alg] = binascii.a2b_base64(b64_digest) | |
| 122 elif key == 'x-goog-component-count': | |
| 123 self.component_count = int(value) | |
| 124 elif key == 'x-goog-generation': | |
| 125 self.generation = value | |
| 126 # Use x-goog-stored-content-encoding and | |
| 127 # x-goog-stored-content-length to indicate original content length | |
| 128 # and encoding, which are transcoding-invariant (so are preferable | |
| 129 # over using content-encoding and size headers). | |
| 130 elif key == 'x-goog-stored-content-encoding': | |
| 131 self.content_encoding = value | |
| 132 elif key == 'x-goog-stored-content-length': | |
| 133 self.size = int(value) | |
| 134 elif key == 'x-goog-storage-class': | |
| 135 self.storage_class = value | |
| 136 | |
| 137 def open_read(self, headers=None, query_args='', | |
| 138 override_num_retries=None, response_headers=None): | |
| 139 """ | |
| 140 Open this key for reading | |
| 141 | |
| 142 :type headers: dict | |
| 143 :param headers: Headers to pass in the web request | |
| 144 | |
| 145 :type query_args: string | |
| 146 :param query_args: Arguments to pass in the query string | |
| 147 (ie, 'torrent') | |
| 148 | |
| 149 :type override_num_retries: int | |
| 150 :param override_num_retries: If not None will override configured | |
| 151 num_retries parameter for underlying GET. | |
| 152 | |
| 153 :type response_headers: dict | |
| 154 :param response_headers: A dictionary containing HTTP | |
| 155 headers/values that will override any headers associated | |
| 156 with the stored object in the response. See | |
| 157 http://goo.gl/EWOPb for details. | |
| 158 """ | |
| 159 # For GCS we need to include the object generation in the query args. | |
| 160 # The rest of the processing is handled in the parent class. | |
| 161 if self.generation: | |
| 162 if query_args: | |
| 163 query_args += '&' | |
| 164 query_args += 'generation=%s' % self.generation | |
| 165 super(Key, self).open_read(headers=headers, query_args=query_args, | |
| 166 override_num_retries=override_num_retries, | |
| 167 response_headers=response_headers) | |
| 168 | |
| 169 def get_file(self, fp, headers=None, cb=None, num_cb=10, | |
| 170 torrent=False, version_id=None, override_num_retries=None, | |
| 171 response_headers=None, hash_algs=None): | |
| 172 query_args = None | |
| 173 if self.generation: | |
| 174 query_args = ['generation=%s' % self.generation] | |
| 175 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, | |
| 176 override_num_retries=override_num_retries, | |
| 177 response_headers=response_headers, | |
| 178 hash_algs=hash_algs, | |
| 179 query_args=query_args) | |
| 180 | |
| 181 def get_contents_to_file(self, fp, headers=None, | |
| 182 cb=None, num_cb=10, | |
| 183 torrent=False, | |
| 184 version_id=None, | |
| 185 res_download_handler=None, | |
| 186 response_headers=None, | |
| 187 hash_algs=None): | |
| 188 """ | |
| 189 Retrieve an object from GCS using the name of the Key object as the | |
| 190 key in GCS. Write the contents of the object to the file pointed | |
| 191 to by 'fp'. | |
| 192 | |
| 193 :type fp: File -like object | |
| 194 :param fp: | |
| 195 | |
| 196 :type headers: dict | |
| 197 :param headers: additional HTTP headers that will be sent with | |
| 198 the GET request. | |
| 199 | |
| 200 :type cb: function | |
| 201 :param cb: a callback function that will be called to report | |
| 202 progress on the upload. The callback should accept two | |
| 203 integer parameters, the first representing the number of | |
| 204 bytes that have been successfully transmitted to GCS and | |
| 205 the second representing the size of the to be transmitted | |
| 206 object. | |
| 207 | |
| 208 :type cb: int | |
| 209 :param num_cb: (optional) If a callback is specified with the | |
| 210 cb parameter this parameter determines the granularity of | |
| 211 the callback by defining the maximum number of times the | |
| 212 callback will be called during the file transfer. | |
| 213 | |
| 214 :type torrent: bool | |
| 215 :param torrent: If True, returns the contents of a torrent | |
| 216 file as a string. | |
| 217 | |
| 218 :type res_upload_handler: ResumableDownloadHandler | |
| 219 :param res_download_handler: If provided, this handler will | |
| 220 perform the download. | |
| 221 | |
| 222 :type response_headers: dict | |
| 223 :param response_headers: A dictionary containing HTTP | |
| 224 headers/values that will override any headers associated | |
| 225 with the stored object in the response. See | |
| 226 http://goo.gl/sMkcC for details. | |
| 227 """ | |
| 228 if self.bucket is not None: | |
| 229 if res_download_handler: | |
| 230 res_download_handler.get_file(self, fp, headers, cb, num_cb, | |
| 231 torrent=torrent, | |
| 232 version_id=version_id, | |
| 233 hash_algs=hash_algs) | |
| 234 else: | |
| 235 self.get_file(fp, headers, cb, num_cb, torrent=torrent, | |
| 236 version_id=version_id, | |
| 237 response_headers=response_headers, | |
| 238 hash_algs=hash_algs) | |
| 239 | |
| 240 def compute_hash(self, fp, algorithm, size=None): | |
| 241 """ | |
| 242 :type fp: file | |
| 243 :param fp: File pointer to the file to hash. The file | |
| 244 pointer will be reset to the same position before the | |
| 245 method returns. | |
| 246 | |
| 247 :type algorithm: zero-argument constructor for hash objects that | |
| 248 implements update() and digest() (e.g. hashlib.md5) | |
| 249 | |
| 250 :type size: int | |
| 251 :param size: (optional) The Maximum number of bytes to read | |
| 252 from the file pointer (fp). This is useful when uploading | |
| 253 a file in multiple parts where the file is being split | |
| 254 in place into different parts. Less bytes may be available. | |
| 255 """ | |
| 256 hex_digest, b64_digest, data_size = compute_hash( | |
| 257 fp, size=size, hash_algorithm=algorithm) | |
| 258 # The internal implementation of compute_hash() needs to return the | |
| 259 # data size, but we don't want to return that value to the external | |
| 260 # caller because it changes the class interface (i.e. it might | |
| 261 # break some code), so we consume the third tuple value here and | |
| 262 # return the remainder of the tuple to the caller, thereby preserving | |
| 263 # the existing interface. | |
| 264 self.size = data_size | |
| 265 return (hex_digest, b64_digest) | |
| 266 | |
| 267 def send_file(self, fp, headers=None, cb=None, num_cb=10, | |
| 268 query_args=None, chunked_transfer=False, size=None, | |
| 269 hash_algs=None): | |
| 270 """ | |
| 271 Upload a file to GCS. | |
| 272 | |
| 273 :type fp: file | |
| 274 :param fp: The file pointer to upload. The file pointer must | |
| 275 point at the offset from which you wish to upload. | |
| 276 ie. if uploading the full file, it should point at the | |
| 277 start of the file. Normally when a file is opened for | |
| 278 reading, the fp will point at the first byte. See the | |
| 279 bytes parameter below for more info. | |
| 280 | |
| 281 :type headers: dict | |
| 282 :param headers: The headers to pass along with the PUT request | |
| 283 | |
| 284 :type num_cb: int | |
| 285 :param num_cb: (optional) If a callback is specified with the | |
| 286 cb parameter this parameter determines the granularity of | |
| 287 the callback by defining the maximum number of times the | |
| 288 callback will be called during the file | |
| 289 transfer. Providing a negative integer will cause your | |
| 290 callback to be called with each buffer read. | |
| 291 | |
| 292 :type query_args: string | |
| 293 :param query_args: Arguments to pass in the query string. | |
| 294 | |
| 295 :type chunked_transfer: boolean | |
| 296 :param chunked_transfer: (optional) If true, we use chunked | |
| 297 Transfer-Encoding. | |
| 298 | |
| 299 :type size: int | |
| 300 :param size: (optional) The Maximum number of bytes to read | |
| 301 from the file pointer (fp). This is useful when uploading | |
| 302 a file in multiple parts where you are splitting the file | |
| 303 up into different ranges to be uploaded. If not specified, | |
| 304 the default behaviour is to read all bytes from the file | |
| 305 pointer. Less bytes may be available. | |
| 306 | |
| 307 :type hash_algs: dictionary | |
| 308 :param hash_algs: (optional) Dictionary of hash algorithms and | |
| 309 corresponding hashing class that implements update() and digest(). | |
| 310 Defaults to {'md5': hashlib.md5}. | |
| 311 """ | |
| 312 self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, | |
| 313 query_args=query_args, | |
| 314 chunked_transfer=chunked_transfer, size=size, | |
| 315 hash_algs=hash_algs) | |
| 316 | |
| 317 def delete(self, headers=None): | |
| 318 return self.bucket.delete_key(self.name, version_id=self.version_id, | |
| 319 generation=self.generation, | |
| 320 headers=headers) | |
| 321 | |
| 322 def add_email_grant(self, permission, email_address): | |
| 323 """ | |
| 324 Convenience method that provides a quick way to add an email grant to a | |
| 325 key. This method retrieves the current ACL, creates a new grant based on | |
| 326 the parameters passed in, adds that grant to the ACL and then PUT's the | |
| 327 new ACL back to GS. | |
| 328 | |
| 329 :type permission: string | |
| 330 :param permission: The permission being granted. Should be one of: | |
| 331 READ|FULL_CONTROL | |
| 332 See http://code.google.com/apis/storage/docs/developer-guide.html#authorization | |
| 333 for more details on permissions. | |
| 334 | |
| 335 :type email_address: string | |
| 336 :param email_address: The email address associated with the Google | |
| 337 account to which you are granting the permission. | |
| 338 """ | |
| 339 acl = self.get_acl() | |
| 340 acl.add_email_grant(permission, email_address) | |
| 341 self.set_acl(acl) | |
| 342 | |
| 343 def add_user_grant(self, permission, user_id): | |
| 344 """ | |
| 345 Convenience method that provides a quick way to add a canonical user | |
| 346 grant to a key. This method retrieves the current ACL, creates a new | |
| 347 grant based on the parameters passed in, adds that grant to the ACL and | |
| 348 then PUT's the new ACL back to GS. | |
| 349 | |
| 350 :type permission: string | |
| 351 :param permission: The permission being granted. Should be one of: | |
| 352 READ|FULL_CONTROL | |
| 353 See http://code.google.com/apis/storage/docs/developer-guide.html#authorization | |
| 354 for more details on permissions. | |
| 355 | |
| 356 :type user_id: string | |
| 357 :param user_id: The canonical user id associated with the GS account to | |
| 358 which you are granting the permission. | |
| 359 """ | |
| 360 acl = self.get_acl() | |
| 361 acl.add_user_grant(permission, user_id) | |
| 362 self.set_acl(acl) | |
| 363 | |
| 364 def add_group_email_grant(self, permission, email_address, headers=None): | |
| 365 """ | |
| 366 Convenience method that provides a quick way to add an email group | |
| 367 grant to a key. This method retrieves the current ACL, creates a new | |
| 368 grant based on the parameters passed in, adds that grant to the ACL and | |
| 369 then PUT's the new ACL back to GS. | |
| 370 | |
| 371 :type permission: string | |
| 372 :param permission: The permission being granted. Should be one of: | |
| 373 READ|FULL_CONTROL | |
| 374 See http://code.google.com/apis/storage/docs/developer-guide.html#authorization | |
| 375 for more details on permissions. | |
| 376 | |
| 377 :type email_address: string | |
| 378 :param email_address: The email address associated with the Google | |
| 379 Group to which you are granting the permission. | |
| 380 """ | |
| 381 acl = self.get_acl(headers=headers) | |
| 382 acl.add_group_email_grant(permission, email_address) | |
| 383 self.set_acl(acl, headers=headers) | |
| 384 | |
| 385 def add_group_grant(self, permission, group_id): | |
| 386 """ | |
| 387 Convenience method that provides a quick way to add a canonical group | |
| 388 grant to a key. This method retrieves the current ACL, creates a new | |
| 389 grant based on the parameters passed in, adds that grant to the ACL and | |
| 390 then PUT's the new ACL back to GS. | |
| 391 | |
| 392 :type permission: string | |
| 393 :param permission: The permission being granted. Should be one of: | |
| 394 READ|FULL_CONTROL | |
| 395 See http://code.google.com/apis/storage/docs/developer-guide.html#authorization | |
| 396 for more details on permissions. | |
| 397 | |
| 398 :type group_id: string | |
| 399 :param group_id: The canonical group id associated with the Google | |
| 400 Groups account you are granting the permission to. | |
| 401 """ | |
| 402 acl = self.get_acl() | |
| 403 acl.add_group_grant(permission, group_id) | |
| 404 self.set_acl(acl) | |
| 405 | |
| 406 def set_contents_from_file(self, fp, headers=None, replace=True, | |
| 407 cb=None, num_cb=10, policy=None, md5=None, | |
| 408 res_upload_handler=None, size=None, rewind=False, | |
| 409 if_generation=None): | |
| 410 """ | |
| 411 Store an object in GS using the name of the Key object as the | |
| 412 key in GS and the contents of the file pointed to by 'fp' as the | |
| 413 contents. | |
| 414 | |
| 415 :type fp: file | |
| 416 :param fp: The file whose contents are to be uploaded. | |
| 417 | |
| 418 :type headers: dict | |
| 419 :param headers: (optional) Additional HTTP headers to be sent with the | |
| 420 PUT request. | |
| 421 | |
| 422 :type replace: bool | |
| 423 :param replace: (optional) If this parameter is False, the method will | |
| 424 first check to see if an object exists in the bucket with the same | |
| 425 key. If it does, it won't overwrite it. The default value is True | |
| 426 which will overwrite the object. | |
| 427 | |
| 428 :type cb: function | |
| 429 :param cb: (optional) Callback function that will be called to report | |
| 430 progress on the upload. The callback should accept two integer | |
| 431 parameters, the first representing the number of bytes that have | |
| 432 been successfully transmitted to GS and the second representing the | |
| 433 total number of bytes that need to be transmitted. | |
| 434 | |
| 435 :type num_cb: int | |
| 436 :param num_cb: (optional) If a callback is specified with the cb | |
| 437 parameter, this parameter determines the granularity of the callback | |
| 438 by defining the maximum number of times the callback will be called | |
| 439 during the file transfer. | |
| 440 | |
| 441 :type policy: :class:`boto.gs.acl.CannedACLStrings` | |
| 442 :param policy: (optional) A canned ACL policy that will be applied to | |
| 443 the new key in GS. | |
| 444 | |
| 445 :type md5: tuple | |
| 446 :param md5: (optional) A tuple containing the hexdigest version of the | |
| 447 MD5 checksum of the file as the first element and the | |
| 448 Base64-encoded version of the plain checksum as the second element. | |
| 449 This is the same format returned by the compute_md5 method. | |
| 450 | |
| 451 If you need to compute the MD5 for any reason prior to upload, it's | |
| 452 silly to have to do it twice so this param, if present, will be | |
| 453 used as the MD5 values of the file. Otherwise, the checksum will be | |
| 454 computed. | |
| 455 | |
| 456 :type res_upload_handler: :py:class:`boto.gs.resumable_upload_handler.ResumableUploadHandler` | |
| 457 :param res_upload_handler: (optional) If provided, this handler will | |
| 458 perform the upload. | |
| 459 | |
| 460 :type size: int | |
| 461 :param size: (optional) The Maximum number of bytes to read from the | |
| 462 file pointer (fp). This is useful when uploading a file in multiple | |
| 463 parts where you are splitting the file up into different ranges to | |
| 464 be uploaded. If not specified, the default behaviour is to read all | |
| 465 bytes from the file pointer. Less bytes may be available. | |
| 466 | |
| 467 Notes: | |
| 468 | |
| 469 1. The "size" parameter currently cannot be used when a | |
| 470 resumable upload handler is given but is still useful for | |
| 471 uploading part of a file as implemented by the parent class. | |
| 472 2. At present Google Cloud Storage does not support multipart | |
| 473 uploads. | |
| 474 | |
| 475 :type rewind: bool | |
| 476 :param rewind: (optional) If True, the file pointer (fp) will be | |
| 477 rewound to the start before any bytes are read from it. The default | |
| 478 behaviour is False which reads from the current position of the | |
| 479 file pointer (fp). | |
| 480 | |
| 481 :type if_generation: int | |
| 482 :param if_generation: (optional) If set to a generation number, the | |
| 483 object will only be written to if its current generation number is | |
| 484 this value. If set to the value 0, the object will only be written | |
| 485 if it doesn't already exist. | |
| 486 | |
| 487 :rtype: int | |
| 488 :return: The number of bytes written to the key. | |
| 489 | |
| 490 TODO: At some point we should refactor the Bucket and Key classes, | |
| 491 to move functionality common to all providers into a parent class, | |
| 492 and provider-specific functionality into subclasses (rather than | |
| 493 just overriding/sharing code the way it currently works). | |
| 494 """ | |
| 495 provider = self.bucket.connection.provider | |
| 496 if res_upload_handler and size: | |
| 497 # could use size instead of file_length if provided but... | |
| 498 raise BotoClientError( | |
| 499 '"size" param not supported for resumable uploads.') | |
| 500 headers = headers or {} | |
| 501 if policy: | |
| 502 headers[provider.acl_header] = policy | |
| 503 | |
| 504 if rewind: | |
| 505 # caller requests reading from beginning of fp. | |
| 506 fp.seek(0, os.SEEK_SET) | |
| 507 else: | |
| 508 # The following seek/tell/seek logic is intended | |
| 509 # to detect applications using the older interface to | |
| 510 # set_contents_from_file(), which automatically rewound the | |
| 511 # file each time the Key was reused. This changed with commit | |
| 512 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads | |
| 513 # split into multiple parts and uploaded in parallel, and at | |
| 514 # the time of that commit this check was added because otherwise | |
| 515 # older programs would get a success status and upload an empty | |
| 516 # object. Unfortuantely, it's very inefficient for fp's implemented | |
| 517 # by KeyFile (used, for example, by gsutil when copying between | |
| 518 # providers). So, we skip the check for the KeyFile case. | |
| 519 # TODO: At some point consider removing this seek/tell/seek | |
| 520 # logic, after enough time has passed that it's unlikely any | |
| 521 # programs remain that assume the older auto-rewind interface. | |
| 522 if not isinstance(fp, KeyFile): | |
| 523 spos = fp.tell() | |
| 524 fp.seek(0, os.SEEK_END) | |
| 525 if fp.tell() == spos: | |
| 526 fp.seek(0, os.SEEK_SET) | |
| 527 if fp.tell() != spos: | |
| 528 # Raise an exception as this is likely a programming | |
| 529 # error whereby there is data before the fp but nothing | |
| 530 # after it. | |
| 531 fp.seek(spos) | |
| 532 raise AttributeError('fp is at EOF. Use rewind option ' | |
| 533 'or seek() to data start.') | |
| 534 # seek back to the correct position. | |
| 535 fp.seek(spos) | |
| 536 | |
| 537 if hasattr(fp, 'name'): | |
| 538 self.path = fp.name | |
| 539 if self.bucket is not None: | |
| 540 if isinstance(fp, KeyFile): | |
| 541 # Avoid EOF seek for KeyFile case as it's very inefficient. | |
| 542 key = fp.getkey() | |
| 543 size = key.size - fp.tell() | |
| 544 self.size = size | |
| 545 # At present both GCS and S3 use MD5 for the etag for | |
| 546 # non-multipart-uploaded objects. If the etag is 32 hex | |
| 547 # chars use it as an MD5, to avoid having to read the file | |
| 548 # twice while transferring. | |
| 549 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): | |
| 550 etag = key.etag.strip('"') | |
| 551 md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) | |
| 552 if size: | |
| 553 self.size = size | |
| 554 else: | |
| 555 # If md5 is provided, still need to size so | |
| 556 # calculate based on bytes to end of content | |
| 557 spos = fp.tell() | |
| 558 fp.seek(0, os.SEEK_END) | |
| 559 self.size = fp.tell() - spos | |
| 560 fp.seek(spos) | |
| 561 size = self.size | |
| 562 | |
| 563 if md5 is None: | |
| 564 md5 = self.compute_md5(fp, size) | |
| 565 self.md5 = md5[0] | |
| 566 self.base64md5 = md5[1] | |
| 567 | |
| 568 if self.name is None: | |
| 569 self.name = self.md5 | |
| 570 | |
| 571 if not replace: | |
| 572 if self.bucket.lookup(self.name): | |
| 573 return | |
| 574 | |
| 575 if if_generation is not None: | |
| 576 headers['x-goog-if-generation-match'] = str(if_generation) | |
| 577 | |
| 578 if res_upload_handler: | |
| 579 res_upload_handler.send_file(self, fp, headers, cb, num_cb) | |
| 580 else: | |
| 581 # Not a resumable transfer so use basic send_file mechanism. | |
| 582 self.send_file(fp, headers, cb, num_cb, size=size) | |
| 583 | |
| 584 def set_contents_from_filename(self, filename, headers=None, replace=True, | |
| 585 cb=None, num_cb=10, policy=None, md5=None, | |
| 586 reduced_redundancy=None, | |
| 587 res_upload_handler=None, | |
| 588 if_generation=None): | |
| 589 """ | |
| 590 Store an object in GS using the name of the Key object as the | |
| 591 key in GS and the contents of the file named by 'filename'. | |
| 592 See set_contents_from_file method for details about the | |
| 593 parameters. | |
| 594 | |
| 595 :type filename: string | |
| 596 :param filename: The name of the file that you want to put onto GS. | |
| 597 | |
| 598 :type headers: dict | |
| 599 :param headers: (optional) Additional headers to pass along with the | |
| 600 request to GS. | |
| 601 | |
| 602 :type replace: bool | |
| 603 :param replace: (optional) If True, replaces the contents of the file | |
| 604 if it already exists. | |
| 605 | |
| 606 :type cb: function | |
| 607 :param cb: (optional) Callback function that will be called to report | |
| 608 progress on the upload. The callback should accept two integer | |
| 609 parameters, the first representing the number of bytes that have | |
| 610 been successfully transmitted to GS and the second representing the | |
| 611 total number of bytes that need to be transmitted. | |
| 612 | |
| 613 :type num_cb: int | |
| 614 :param num_cb: (optional) If a callback is specified with the cb | |
| 615 parameter this parameter determines the granularity of the callback | |
| 616 by defining the maximum number of times the callback will be called | |
| 617 during the file transfer. | |
| 618 | |
| 619 :type policy: :py:attribute:`boto.gs.acl.CannedACLStrings` | |
| 620 :param policy: (optional) A canned ACL policy that will be applied to | |
| 621 the new key in GS. | |
| 622 | |
| 623 :type md5: tuple | |
| 624 :param md5: (optional) A tuple containing the hexdigest version of the | |
| 625 MD5 checksum of the file as the first element and the | |
| 626 Base64-encoded version of the plain checksum as the second element. | |
| 627 This is the same format returned by the compute_md5 method. | |
| 628 | |
| 629 If you need to compute the MD5 for any reason prior to upload, it's | |
| 630 silly to have to do it twice so this param, if present, will be | |
| 631 used as the MD5 values of the file. Otherwise, the checksum will be | |
| 632 computed. | |
| 633 | |
| 634 :type res_upload_handler: :py:class:`boto.gs.resumable_upload_handler.ResumableUploadHandler` | |
| 635 :param res_upload_handler: (optional) If provided, this handler will | |
| 636 perform the upload. | |
| 637 | |
| 638 :type if_generation: int | |
| 639 :param if_generation: (optional) If set to a generation number, the | |
| 640 object will only be written to if its current generation number is | |
| 641 this value. If set to the value 0, the object will only be written | |
| 642 if it doesn't already exist. | |
| 643 """ | |
| 644 # Clear out any previously computed hashes, since we are setting the | |
| 645 # content. | |
| 646 self.local_hashes = {} | |
| 647 | |
| 648 with open(filename, 'rb') as fp: | |
| 649 self.set_contents_from_file(fp, headers, replace, cb, num_cb, | |
| 650 policy, md5, res_upload_handler, | |
| 651 if_generation=if_generation) | |
| 652 | |
| 653 def set_contents_from_string(self, s, headers=None, replace=True, | |
| 654 cb=None, num_cb=10, policy=None, md5=None, | |
| 655 if_generation=None): | |
| 656 """ | |
| 657 Store an object in GCS using the name of the Key object as the | |
| 658 key in GCS and the string 's' as the contents. | |
| 659 See set_contents_from_file method for details about the | |
| 660 parameters. | |
| 661 | |
| 662 :type headers: dict | |
| 663 :param headers: Additional headers to pass along with the | |
| 664 request to AWS. | |
| 665 | |
| 666 :type replace: bool | |
| 667 :param replace: If True, replaces the contents of the file if | |
| 668 it already exists. | |
| 669 | |
| 670 :type cb: function | |
| 671 :param cb: a callback function that will be called to report | |
| 672 progress on the upload. The callback should accept | |
| 673 two integer parameters, the first representing the | |
| 674 number of bytes that have been successfully | |
| 675 transmitted to GCS and the second representing the | |
| 676 size of the to be transmitted object. | |
| 677 | |
| 678 :type cb: int | |
| 679 :param num_cb: (optional) If a callback is specified with | |
| 680 the cb parameter this parameter determines the | |
| 681 granularity of the callback by defining | |
| 682 the maximum number of times the callback will | |
| 683 be called during the file transfer. | |
| 684 | |
| 685 :type policy: :class:`boto.gs.acl.CannedACLStrings` | |
| 686 :param policy: A canned ACL policy that will be applied to the | |
| 687 new key in GCS. | |
| 688 | |
| 689 :type md5: A tuple containing the hexdigest version of the MD5 | |
| 690 checksum of the file as the first element and the | |
| 691 Base64-encoded version of the plain checksum as the | |
| 692 second element. This is the same format returned by | |
| 693 the compute_md5 method. | |
| 694 :param md5: If you need to compute the MD5 for any reason prior | |
| 695 to upload, it's silly to have to do it twice so this | |
| 696 param, if present, will be used as the MD5 values | |
| 697 of the file. Otherwise, the checksum will be computed. | |
| 698 | |
| 699 :type if_generation: int | |
| 700 :param if_generation: (optional) If set to a generation number, the | |
| 701 object will only be written to if its current generation number is | |
| 702 this value. If set to the value 0, the object will only be written | |
| 703 if it doesn't already exist. | |
| 704 """ | |
| 705 | |
| 706 # Clear out any previously computed md5 hashes, since we are setting the content. | |
| 707 self.md5 = None | |
| 708 self.base64md5 = None | |
| 709 | |
| 710 fp = StringIO(get_utf8_value(s)) | |
| 711 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, | |
| 712 policy, md5, | |
| 713 if_generation=if_generation) | |
| 714 fp.close() | |
| 715 return r | |
| 716 | |
| 717 def set_contents_from_stream(self, *args, **kwargs): | |
| 718 """ | |
| 719 Store an object using the name of the Key object as the key in | |
| 720 cloud and the contents of the data stream pointed to by 'fp' as | |
| 721 the contents. | |
| 722 | |
| 723 The stream object is not seekable and total size is not known. | |
| 724 This has the implication that we can't specify the | |
| 725 Content-Size and Content-MD5 in the header. So for huge | |
| 726 uploads, the delay in calculating MD5 is avoided but with a | |
| 727 penalty of inability to verify the integrity of the uploaded | |
| 728 data. | |
| 729 | |
| 730 :type fp: file | |
| 731 :param fp: the file whose contents are to be uploaded | |
| 732 | |
| 733 :type headers: dict | |
| 734 :param headers: additional HTTP headers to be sent with the | |
| 735 PUT request. | |
| 736 | |
| 737 :type replace: bool | |
| 738 :param replace: If this parameter is False, the method will first check | |
| 739 to see if an object exists in the bucket with the same key. If it | |
| 740 does, it won't overwrite it. The default value is True which will | |
| 741 overwrite the object. | |
| 742 | |
| 743 :type cb: function | |
| 744 :param cb: a callback function that will be called to report | |
| 745 progress on the upload. The callback should accept two integer | |
| 746 parameters, the first representing the number of bytes that have | |
| 747 been successfully transmitted to GS and the second representing the | |
| 748 total number of bytes that need to be transmitted. | |
| 749 | |
| 750 :type num_cb: int | |
| 751 :param num_cb: (optional) If a callback is specified with the | |
| 752 cb parameter, this parameter determines the granularity of | |
| 753 the callback by defining the maximum number of times the | |
| 754 callback will be called during the file transfer. | |
| 755 | |
| 756 :type policy: :class:`boto.gs.acl.CannedACLStrings` | |
| 757 :param policy: A canned ACL policy that will be applied to the new key | |
| 758 in GS. | |
| 759 | |
| 760 :type size: int | |
| 761 :param size: (optional) The Maximum number of bytes to read from | |
| 762 the file pointer (fp). This is useful when uploading a | |
| 763 file in multiple parts where you are splitting the file up | |
| 764 into different ranges to be uploaded. If not specified, | |
| 765 the default behaviour is to read all bytes from the file | |
| 766 pointer. Less bytes may be available. | |
| 767 | |
| 768 :type if_generation: int | |
| 769 :param if_generation: (optional) If set to a generation number, the | |
| 770 object will only be written to if its current generation number is | |
| 771 this value. If set to the value 0, the object will only be written | |
| 772 if it doesn't already exist. | |
| 773 """ | |
| 774 if_generation = kwargs.pop('if_generation', None) | |
| 775 if if_generation is not None: | |
| 776 headers = kwargs.get('headers', {}) | |
| 777 headers['x-goog-if-generation-match'] = str(if_generation) | |
| 778 kwargs['headers'] = headers | |
| 779 super(Key, self).set_contents_from_stream(*args, **kwargs) | |
| 780 | |
| 781 def set_acl(self, acl_or_str, headers=None, generation=None, | |
| 782 if_generation=None, if_metageneration=None): | |
| 783 """Sets the ACL for this object. | |
| 784 | |
| 785 :type acl_or_str: string or :class:`boto.gs.acl.ACL` | |
| 786 :param acl_or_str: A canned ACL string (see | |
| 787 :data:`~.gs.acl.CannedACLStrings`) or an ACL object. | |
| 788 | |
| 789 :type headers: dict | |
| 790 :param headers: Additional headers to set during the request. | |
| 791 | |
| 792 :type generation: int | |
| 793 :param generation: If specified, sets the ACL for a specific generation | |
| 794 of a versioned object. If not specified, the current version is | |
| 795 modified. | |
| 796 | |
| 797 :type if_generation: int | |
| 798 :param if_generation: (optional) If set to a generation number, the acl | |
| 799 will only be updated if its current generation number is this value. | |
| 800 | |
| 801 :type if_metageneration: int | |
| 802 :param if_metageneration: (optional) If set to a metageneration number, | |
| 803 the acl will only be updated if its current metageneration number is | |
| 804 this value. | |
| 805 """ | |
| 806 if self.bucket is not None: | |
| 807 self.bucket.set_acl(acl_or_str, self.name, headers=headers, | |
| 808 generation=generation, | |
| 809 if_generation=if_generation, | |
| 810 if_metageneration=if_metageneration) | |
| 811 | |
| 812 def get_acl(self, headers=None, generation=None): | |
| 813 """Returns the ACL of this object. | |
| 814 | |
| 815 :param dict headers: Additional headers to set during the request. | |
| 816 | |
| 817 :param int generation: If specified, gets the ACL for a specific | |
| 818 generation of a versioned object. If not specified, the current | |
| 819 version is returned. | |
| 820 | |
| 821 :rtype: :class:`.gs.acl.ACL` | |
| 822 """ | |
| 823 if self.bucket is not None: | |
| 824 return self.bucket.get_acl(self.name, headers=headers, | |
| 825 generation=generation) | |
| 826 | |
| 827 def get_xml_acl(self, headers=None, generation=None): | |
| 828 """Returns the ACL string of this object. | |
| 829 | |
| 830 :param dict headers: Additional headers to set during the request. | |
| 831 | |
| 832 :param int generation: If specified, gets the ACL for a specific | |
| 833 generation of a versioned object. If not specified, the current | |
| 834 version is returned. | |
| 835 | |
| 836 :rtype: str | |
| 837 """ | |
| 838 if self.bucket is not None: | |
| 839 return self.bucket.get_xml_acl(self.name, headers=headers, | |
| 840 generation=generation) | |
| 841 | |
| 842 def set_xml_acl(self, acl_str, headers=None, generation=None, | |
| 843 if_generation=None, if_metageneration=None): | |
| 844 """Sets this objects's ACL to an XML string. | |
| 845 | |
| 846 :type acl_str: string | |
| 847 :param acl_str: A string containing the ACL XML. | |
| 848 | |
| 849 :type headers: dict | |
| 850 :param headers: Additional headers to set during the request. | |
| 851 | |
| 852 :type generation: int | |
| 853 :param generation: If specified, sets the ACL for a specific generation | |
| 854 of a versioned object. If not specified, the current version is | |
| 855 modified. | |
| 856 | |
| 857 :type if_generation: int | |
| 858 :param if_generation: (optional) If set to a generation number, the acl | |
| 859 will only be updated if its current generation number is this value. | |
| 860 | |
| 861 :type if_metageneration: int | |
| 862 :param if_metageneration: (optional) If set to a metageneration number, | |
| 863 the acl will only be updated if its current metageneration number is | |
| 864 this value. | |
| 865 """ | |
| 866 if self.bucket is not None: | |
| 867 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers, | |
| 868 generation=generation, | |
| 869 if_generation=if_generation, | |
| 870 if_metageneration=if_metageneration) | |
| 871 | |
| 872 def set_canned_acl(self, acl_str, headers=None, generation=None, | |
| 873 if_generation=None, if_metageneration=None): | |
| 874 """Sets this objects's ACL using a predefined (canned) value. | |
| 875 | |
| 876 :type acl_str: string | |
| 877 :param acl_str: A canned ACL string. See | |
| 878 :data:`~.gs.acl.CannedACLStrings`. | |
| 879 | |
| 880 :type headers: dict | |
| 881 :param headers: Additional headers to set during the request. | |
| 882 | |
| 883 :type generation: int | |
| 884 :param generation: If specified, sets the ACL for a specific generation | |
| 885 of a versioned object. If not specified, the current version is | |
| 886 modified. | |
| 887 | |
| 888 :type if_generation: int | |
| 889 :param if_generation: (optional) If set to a generation number, the acl | |
| 890 will only be updated if its current generation number is this value. | |
| 891 | |
| 892 :type if_metageneration: int | |
| 893 :param if_metageneration: (optional) If set to a metageneration number, | |
| 894 the acl will only be updated if its current metageneration number is | |
| 895 this value. | |
| 896 """ | |
| 897 if self.bucket is not None: | |
| 898 return self.bucket.set_canned_acl( | |
| 899 acl_str, | |
| 900 self.name, | |
| 901 headers=headers, | |
| 902 generation=generation, | |
| 903 if_generation=if_generation, | |
| 904 if_metageneration=if_metageneration | |
| 905 ) | |
| 906 | |
| 907 def compose(self, components, content_type=None, headers=None): | |
| 908 """Create a new object from a sequence of existing objects. | |
| 909 | |
| 910 The content of the object representing this Key will be the | |
| 911 concatenation of the given object sequence. For more detail, visit | |
| 912 | |
| 913 https://developers.google.com/storage/docs/composite-objects | |
| 914 | |
| 915 :type components list of Keys | |
| 916 :param components List of gs.Keys representing the component objects | |
| 917 | |
| 918 :type content_type (optional) string | |
| 919 :param content_type Content type for the new composite object. | |
| 920 """ | |
| 921 compose_req = [] | |
| 922 for key in components: | |
| 923 if key.bucket.name != self.bucket.name: | |
| 924 raise BotoClientError( | |
| 925 'GCS does not support inter-bucket composing') | |
| 926 | |
| 927 generation_tag = '' | |
| 928 if key.generation: | |
| 929 generation_tag = ('<Generation>%s</Generation>' | |
| 930 % str(key.generation)) | |
| 931 compose_req.append('<Component><Name>%s</Name>%s</Component>' % | |
| 932 (key.name, generation_tag)) | |
| 933 compose_req_xml = ('<ComposeRequest>%s</ComposeRequest>' % | |
| 934 ''.join(compose_req)) | |
| 935 headers = headers or {} | |
| 936 if content_type: | |
| 937 headers['Content-Type'] = content_type | |
| 938 resp = self.bucket.connection.make_request( | |
| 939 'PUT', get_utf8_value(self.bucket.name), get_utf8_value(self.name), | |
| 940 headers=headers, query_args='compose', | |
| 941 data=get_utf8_value(compose_req_xml)) | |
| 942 if resp.status < 200 or resp.status > 299: | |
| 943 raise self.bucket.connection.provider.storage_response_error( | |
| 944 resp.status, resp.reason, resp.read()) | |
| 945 | |
| 946 # Return the generation so that the result URI can be built with this | |
| 947 # for automatic parallel uploads. | |
| 948 return resp.getheader('x-goog-generation') |
