Mercurial > repos > shellac > guppy_basecaller
diff env/bin/s3put @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/bin/s3put Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,438 +0,0 @@ -#!/Users/pldms/Development/Projects/2020/david-matthews-galaxy/guppy_basecaller/env/bin/python3 -# Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, dis- -# tribute, sublicense, and/or sell copies of the Software, and to permit -# persons to whom the Software is furnished to do so, subject to the fol- -# lowing conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- -# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -import getopt -import sys -import os -import boto - -from boto.compat import six - -try: - # multipart portions copyright Fabian Topfstedt - # https://gist.github.com/924094 - - import math - import mimetypes - from multiprocessing import Pool - from boto.s3.connection import S3Connection - from filechunkio import FileChunkIO - multipart_capable = True - usage_flag_multipart_capable = """ [--multipart]""" - usage_string_multipart_capable = """ - multipart - Upload files as multiple parts. This needs filechunkio. - Requires ListBucket, ListMultipartUploadParts, - ListBucketMultipartUploads and PutObject permissions.""" -except ImportError as err: - multipart_capable = False - usage_flag_multipart_capable = "" - if six.PY2: - attribute = 'message' - else: - attribute = 'msg' - usage_string_multipart_capable = '\n\n "' + \ - getattr(err, attribute)[len('No module named '):] + \ - '" is missing for multipart support ' - - -DEFAULT_REGION = 'us-east-1' - -usage_string = """ -SYNOPSIS - s3put [-a/--access_key <access_key>] [-s/--secret_key <secret_key>] - -b/--bucket <bucket_name> [-c/--callback <num_cb>] - [-d/--debug <debug_level>] [-i/--ignore <ignore_dirs>] - [-n/--no_op] [-p/--prefix <prefix>] [-k/--key_prefix <key_prefix>] - [-q/--quiet] [-g/--grant grant] [-w/--no_overwrite] [-r/--reduced] - [--header] [--region <name>] [--host <s3_host>]""" + \ - usage_flag_multipart_capable + """ path [path...] - - Where - access_key - Your AWS Access Key ID. If not supplied, boto will - use the value of the environment variable - AWS_ACCESS_KEY_ID - secret_key - Your AWS Secret Access Key. If not supplied, boto - will use the value of the environment variable - AWS_SECRET_ACCESS_KEY - bucket_name - The name of the S3 bucket the file(s) should be - copied to. - path - A path to a directory or file that represents the items - to be uploaded. If the path points to an individual file, - that file will be uploaded to the specified bucket. If the - path points to a directory, it will recursively traverse - the directory and upload all files to the specified bucket. - debug_level - 0 means no debug output (default), 1 means normal - debug output from boto, and 2 means boto debug output - plus request/response output from httplib - ignore_dirs - a comma-separated list of directory names that will - be ignored and not uploaded to S3. - num_cb - The number of progress callbacks to display. The default - is zero which means no callbacks. If you supplied a value - of "-c 10" for example, the progress callback would be - called 10 times for each file transferred. - prefix - A file path prefix that will be stripped from the full - path of the file when determining the key name in S3. - For example, if the full path of a file is: - /home/foo/bar/fie.baz - and the prefix is specified as "-p /home/foo/" the - resulting key name in S3 will be: - /bar/fie.baz - The prefix must end in a trailing separator and if it - does not then one will be added. - key_prefix - A prefix to be added to the S3 key name, after any - stripping of the file path is done based on the - "-p/--prefix" option. - reduced - Use Reduced Redundancy storage - grant - A canned ACL policy that will be granted on each file - transferred to S3. The value of provided must be one - of the "canned" ACL policies supported by S3: - private|public-read|public-read-write|authenticated-read - no_overwrite - No files will be overwritten on S3, if the file/key - exists on s3 it will be kept. This is useful for - resuming interrupted transfers. Note this is not a - sync, even if the file has been updated locally if - the key exists on s3 the file on s3 will not be - updated. - header - key=value pairs of extra header(s) to pass along in the - request - region - Manually set a region for buckets that are not in the US - classic region. Normally the region is autodetected, but - setting this yourself is more efficient. - host - Hostname override, for using an endpoint other then AWS S3 -""" + usage_string_multipart_capable + """ - - - If the -n option is provided, no files will be transferred to S3 but - informational messages will be printed about what would happen. -""" - - -def usage(status=1): - print(usage_string) - sys.exit(status) - - -def submit_cb(bytes_so_far, total_bytes): - print('%d bytes transferred / %d bytes total' % (bytes_so_far, total_bytes)) - - -def get_key_name(fullpath, prefix, key_prefix): - if fullpath.startswith(prefix): - key_name = fullpath[len(prefix):] - else: - key_name = fullpath - l = key_name.split(os.sep) - return key_prefix + '/'.join(l) - - -def _upload_part(bucketname, aws_key, aws_secret, multipart_id, part_num, - source_path, offset, bytes, debug, cb, num_cb, - amount_of_retries=10): - """ - Uploads a part with retries. - """ - if debug == 1: - print("_upload_part(%s, %s, %s)" % (source_path, offset, bytes)) - - def _upload(retries_left=amount_of_retries): - try: - if debug == 1: - print('Start uploading part #%d ...' % part_num) - conn = S3Connection(aws_key, aws_secret) - conn.debug = debug - bucket = conn.get_bucket(bucketname) - for mp in bucket.get_all_multipart_uploads(): - if mp.id == multipart_id: - with FileChunkIO(source_path, 'r', offset=offset, - bytes=bytes) as fp: - mp.upload_part_from_file(fp=fp, part_num=part_num, - cb=cb, num_cb=num_cb) - break - except Exception as exc: - if retries_left: - _upload(retries_left=retries_left - 1) - else: - print('Failed uploading part #%d' % part_num) - raise exc - else: - if debug == 1: - print('... Uploaded part #%d' % part_num) - - _upload() - -def check_valid_region(conn, region): - if conn is None: - print('Invalid region (%s)' % region) - sys.exit(1) - -def multipart_upload(bucketname, aws_key, aws_secret, source_path, keyname, - reduced, debug, cb, num_cb, acl='private', headers={}, - guess_mimetype=True, parallel_processes=4, - region=DEFAULT_REGION): - """ - Parallel multipart upload. - """ - conn = boto.s3.connect_to_region(region, aws_access_key_id=aws_key, - aws_secret_access_key=aws_secret) - check_valid_region(conn, region) - conn.debug = debug - bucket = conn.get_bucket(bucketname) - - if guess_mimetype: - mtype = mimetypes.guess_type(keyname)[0] or 'application/octet-stream' - headers.update({'Content-Type': mtype}) - - mp = bucket.initiate_multipart_upload(keyname, headers=headers, - reduced_redundancy=reduced) - - source_size = os.stat(source_path).st_size - bytes_per_chunk = max(int(math.sqrt(5242880) * math.sqrt(source_size)), - 5242880) - chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) - - pool = Pool(processes=parallel_processes) - for i in range(chunk_amount): - offset = i * bytes_per_chunk - remaining_bytes = source_size - offset - bytes = min([bytes_per_chunk, remaining_bytes]) - part_num = i + 1 - pool.apply_async(_upload_part, [bucketname, aws_key, aws_secret, mp.id, - part_num, source_path, offset, bytes, - debug, cb, num_cb]) - pool.close() - pool.join() - - if len(mp.get_all_parts()) == chunk_amount: - mp.complete_upload() - key = bucket.get_key(keyname) - key.set_acl(acl) - else: - mp.cancel_upload() - - -def singlepart_upload(bucket, key_name, fullpath, *kargs, **kwargs): - """ - Single upload. - """ - k = bucket.new_key(key_name) - k.set_contents_from_filename(fullpath, *kargs, **kwargs) - - -def expand_path(path): - path = os.path.expanduser(path) - path = os.path.expandvars(path) - return os.path.abspath(path) - - -def main(): - - # default values - aws_access_key_id = None - aws_secret_access_key = None - bucket_name = '' - ignore_dirs = [] - debug = 0 - cb = None - num_cb = 0 - quiet = False - no_op = False - prefix = '/' - key_prefix = '' - grant = None - no_overwrite = False - reduced = False - headers = {} - host = None - multipart_requested = False - region = None - - try: - opts, args = getopt.getopt( - sys.argv[1:], 'a:b:c::d:g:hi:k:np:qs:wr', - ['access_key=', 'bucket=', 'callback=', 'debug=', 'help', 'grant=', - 'ignore=', 'key_prefix=', 'no_op', 'prefix=', 'quiet', - 'secret_key=', 'no_overwrite', 'reduced', 'header=', 'multipart', - 'host=', 'region=']) - except: - usage(1) - - # parse opts - for o, a in opts: - if o in ('-h', '--help'): - usage(0) - if o in ('-a', '--access_key'): - aws_access_key_id = a - if o in ('-b', '--bucket'): - bucket_name = a - if o in ('-c', '--callback'): - num_cb = int(a) - cb = submit_cb - if o in ('-d', '--debug'): - debug = int(a) - if o in ('-g', '--grant'): - grant = a - if o in ('-i', '--ignore'): - ignore_dirs = a.split(',') - if o in ('-n', '--no_op'): - no_op = True - if o in ('-w', '--no_overwrite'): - no_overwrite = True - if o in ('-p', '--prefix'): - prefix = a - if prefix[-1] != os.sep: - prefix = prefix + os.sep - prefix = expand_path(prefix) - if o in ('-k', '--key_prefix'): - key_prefix = a - if o in ('-q', '--quiet'): - quiet = True - if o in ('-s', '--secret_key'): - aws_secret_access_key = a - if o in ('-r', '--reduced'): - reduced = True - if o == '--header': - (k, v) = a.split("=", 1) - headers[k] = v - if o == '--host': - host = a - if o == '--multipart': - if multipart_capable: - multipart_requested = True - else: - print("multipart upload requested but not capable") - sys.exit(4) - if o == '--region': - regions = boto.s3.regions() - for region_info in regions: - if region_info.name == a: - region = a - break - else: - raise ValueError('Invalid region %s specified' % a) - - if len(args) < 1: - usage(2) - - if not bucket_name: - print("bucket name is required!") - usage(3) - - connect_args = { - 'aws_access_key_id': aws_access_key_id, - 'aws_secret_access_key': aws_secret_access_key - } - - if host: - connect_args['host'] = host - - c = boto.s3.connect_to_region(region or DEFAULT_REGION, **connect_args) - check_valid_region(c, region or DEFAULT_REGION) - c.debug = debug - b = c.get_bucket(bucket_name, validate=False) - - # Attempt to determine location and warn if no --host or --region - # arguments were passed. Then try to automagically figure out - # what should have been passed and fix it. - if host is None and region is None: - try: - location = b.get_location() - - # Classic region will be '', any other will have a name - if location: - print('Bucket exists in %s but no host or region given!' % location) - - # Override for EU, which is really Ireland according to the docs - if location == 'EU': - location = 'eu-west-1' - - print('Automatically setting region to %s' % location) - - # Here we create a new connection, and then take the existing - # bucket and set it to use the new connection - c = boto.s3.connect_to_region(location, **connect_args) - c.debug = debug - b.connection = c - except Exception as e: - if debug > 0: - print(e) - print('Could not get bucket region info, skipping...') - - existing_keys_to_check_against = [] - files_to_check_for_upload = [] - - for path in args: - path = expand_path(path) - # upload a directory of files recursively - if os.path.isdir(path): - if no_overwrite: - if not quiet: - print('Getting list of existing keys to check against') - for key in b.list(get_key_name(path, prefix, key_prefix)): - existing_keys_to_check_against.append(key.name) - for root, dirs, files in os.walk(path): - for ignore in ignore_dirs: - if ignore in dirs: - dirs.remove(ignore) - for path in files: - if path.startswith("."): - continue - files_to_check_for_upload.append(os.path.join(root, path)) - - # upload a single file - elif os.path.isfile(path): - fullpath = os.path.abspath(path) - key_name = get_key_name(fullpath, prefix, key_prefix) - files_to_check_for_upload.append(fullpath) - existing_keys_to_check_against.append(key_name) - - # we are trying to upload something unknown - else: - print("I don't know what %s is, so i can't upload it" % path) - - for fullpath in files_to_check_for_upload: - key_name = get_key_name(fullpath, prefix, key_prefix) - - if no_overwrite and key_name in existing_keys_to_check_against: - if b.get_key(key_name): - if not quiet: - print('Skipping %s as it exists in s3' % fullpath) - continue - - if not quiet: - print('Copying %s to %s/%s' % (fullpath, bucket_name, key_name)) - - if not no_op: - # 0-byte files don't work and also don't need multipart upload - if os.stat(fullpath).st_size != 0 and multipart_capable and \ - multipart_requested: - multipart_upload(bucket_name, aws_access_key_id, - aws_secret_access_key, fullpath, key_name, - reduced, debug, cb, num_cb, - grant or 'private', headers, - region=region or DEFAULT_REGION) - else: - singlepart_upload(b, key_name, fullpath, cb=cb, num_cb=num_cb, - policy=grant, reduced_redundancy=reduced, - headers=headers) - -if __name__ == "__main__": - main()