Mercurial > repos > shellac > guppy_basecaller
diff env/lib/python3.7/site-packages/boto/datapipeline/layer1.py @ 5:9b1c78e6ba9c draft default tip
"planemo upload commit 6c0a8142489327ece472c84e558c47da711a9142"
author | shellac |
---|---|
date | Mon, 01 Jun 2020 08:59:25 -0400 |
parents | 79f47841a781 |
children |
line wrap: on
line diff
--- a/env/lib/python3.7/site-packages/boto/datapipeline/layer1.py Thu May 14 16:47:39 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,639 +0,0 @@ -# Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the -# "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, dis- -# tribute, sublicense, and/or sell copies of the Software, and to permit -# persons to whom the Software is furnished to do so, subject to the fol- -# lowing conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- -# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT -# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# - -import boto -from boto.compat import json -from boto.connection import AWSQueryConnection -from boto.regioninfo import RegionInfo -from boto.exception import JSONResponseError -from boto.datapipeline import exceptions - - -class DataPipelineConnection(AWSQueryConnection): - """ - This is the AWS Data Pipeline API Reference . This guide provides - descriptions and samples of the AWS Data Pipeline API. - - AWS Data Pipeline is a web service that configures and manages a - data-driven workflow called a pipeline. AWS Data Pipeline handles - the details of scheduling and ensuring that data dependencies are - met so your application can focus on processing the data. - - The AWS Data Pipeline API implements two main sets of - functionality. The first set of actions configure the pipeline in - the web service. You call these actions to create a pipeline and - define data sources, schedules, dependencies, and the transforms - to be performed on the data. - - The second set of actions are used by a task runner application - that calls the AWS Data Pipeline API to receive the next task - ready for processing. The logic for performing the task, such as - querying the data, running data analysis, or converting the data - from one format to another, is contained within the task runner. - The task runner performs the task assigned to it by the web - service, reporting progress to the web service as it does so. When - the task is done, the task runner reports the final success or - failure of the task to the web service. - - AWS Data Pipeline provides an open-source implementation of a task - runner called AWS Data Pipeline Task Runner. AWS Data Pipeline - Task Runner provides logic for common data management scenarios, - such as performing database queries and running data analysis - using Amazon Elastic MapReduce (Amazon EMR). You can use AWS Data - Pipeline Task Runner as your task runner, or you can write your - own task runner to provide custom data management. - - The AWS Data Pipeline API uses the Signature Version 4 protocol - for signing requests. For more information about how to sign a - request with this protocol, see `Signature Version 4 Signing - Process`_. In the code examples in this reference, the Signature - Version 4 Request parameters are represented as AuthParams. - """ - APIVersion = "2012-10-29" - DefaultRegionName = "us-east-1" - DefaultRegionEndpoint = "datapipeline.us-east-1.amazonaws.com" - ServiceName = "DataPipeline" - TargetPrefix = "DataPipeline" - ResponseError = JSONResponseError - - _faults = { - "PipelineDeletedException": exceptions.PipelineDeletedException, - "InvalidRequestException": exceptions.InvalidRequestException, - "TaskNotFoundException": exceptions.TaskNotFoundException, - "PipelineNotFoundException": exceptions.PipelineNotFoundException, - "InternalServiceError": exceptions.InternalServiceError, - } - - def __init__(self, **kwargs): - region = kwargs.pop('region', None) - if not region: - region = RegionInfo(self, self.DefaultRegionName, - self.DefaultRegionEndpoint) - kwargs['host'] = region.endpoint - super(DataPipelineConnection, self).__init__(**kwargs) - self.region = region - - def _required_auth_capability(self): - return ['hmac-v4'] - - def activate_pipeline(self, pipeline_id): - """ - Validates a pipeline and initiates processing. If the pipeline - does not pass validation, activation fails. - - Call this action to start processing pipeline tasks of a - pipeline you've created using the CreatePipeline and - PutPipelineDefinition actions. A pipeline cannot be modified - after it has been successfully activated. - - :type pipeline_id: string - :param pipeline_id: The identifier of the pipeline to activate. - - """ - params = {'pipelineId': pipeline_id, } - return self.make_request(action='ActivatePipeline', - body=json.dumps(params)) - - def create_pipeline(self, name, unique_id, description=None): - """ - Creates a new empty pipeline. When this action succeeds, you - can then use the PutPipelineDefinition action to populate the - pipeline. - - :type name: string - :param name: The name of the new pipeline. You can use the same name - for multiple pipelines associated with your AWS account, because - AWS Data Pipeline assigns each new pipeline a unique pipeline - identifier. - - :type unique_id: string - :param unique_id: A unique identifier that you specify. This identifier - is not the same as the pipeline identifier assigned by AWS Data - Pipeline. You are responsible for defining the format and ensuring - the uniqueness of this identifier. You use this parameter to ensure - idempotency during repeated calls to CreatePipeline. For example, - if the first call to CreatePipeline does not return a clear - success, you can pass in the same unique identifier and pipeline - name combination on a subsequent call to CreatePipeline. - CreatePipeline ensures that if a pipeline already exists with the - same name and unique identifier, a new pipeline will not be - created. Instead, you'll receive the pipeline identifier from the - previous attempt. The uniqueness of the name and unique identifier - combination is scoped to the AWS account or IAM user credentials. - - :type description: string - :param description: The description of the new pipeline. - - """ - params = {'name': name, 'uniqueId': unique_id, } - if description is not None: - params['description'] = description - return self.make_request(action='CreatePipeline', - body=json.dumps(params)) - - def delete_pipeline(self, pipeline_id): - """ - Permanently deletes a pipeline, its pipeline definition and - its run history. You cannot query or restore a deleted - pipeline. AWS Data Pipeline will attempt to cancel instances - associated with the pipeline that are currently being - processed by task runners. Deleting a pipeline cannot be - undone. - - To temporarily pause a pipeline instead of deleting it, call - SetStatus with the status set to Pause on individual - components. Components that are paused by SetStatus can be - resumed. - - :type pipeline_id: string - :param pipeline_id: The identifier of the pipeline to be deleted. - - """ - params = {'pipelineId': pipeline_id, } - return self.make_request(action='DeletePipeline', - body=json.dumps(params)) - - def describe_objects(self, object_ids, pipeline_id, marker=None, - evaluate_expressions=None): - """ - Returns the object definitions for a set of objects associated - with the pipeline. Object definitions are composed of a set of - fields that define the properties of the object. - - :type pipeline_id: string - :param pipeline_id: Identifier of the pipeline that contains the object - definitions. - - :type object_ids: list - :param object_ids: Identifiers of the pipeline objects that contain the - definitions to be described. You can pass as many as 25 identifiers - in a single call to DescribeObjects. - - :type evaluate_expressions: boolean - :param evaluate_expressions: Indicates whether any expressions in the - object should be evaluated when the object descriptions are - returned. - - :type marker: string - :param marker: The starting point for the results to be returned. The - first time you call DescribeObjects, this value should be empty. As - long as the action returns `HasMoreResults` as `True`, you can call - DescribeObjects again and pass the marker value from the response - to retrieve the next set of results. - - """ - params = { - 'pipelineId': pipeline_id, - 'objectIds': object_ids, - } - if evaluate_expressions is not None: - params['evaluateExpressions'] = evaluate_expressions - if marker is not None: - params['marker'] = marker - return self.make_request(action='DescribeObjects', - body=json.dumps(params)) - - def describe_pipelines(self, pipeline_ids): - """ - Retrieve metadata about one or more pipelines. The information - retrieved includes the name of the pipeline, the pipeline - identifier, its current state, and the user account that owns - the pipeline. Using account credentials, you can retrieve - metadata about pipelines that you or your IAM users have - created. If you are using an IAM user account, you can - retrieve metadata about only those pipelines you have read - permission for. - - To retrieve the full pipeline definition instead of metadata - about the pipeline, call the GetPipelineDefinition action. - - :type pipeline_ids: list - :param pipeline_ids: Identifiers of the pipelines to describe. You can - pass as many as 25 identifiers in a single call to - DescribePipelines. You can obtain pipeline identifiers by calling - ListPipelines. - - """ - params = {'pipelineIds': pipeline_ids, } - return self.make_request(action='DescribePipelines', - body=json.dumps(params)) - - def evaluate_expression(self, pipeline_id, expression, object_id): - """ - Evaluates a string in the context of a specified object. A - task runner can use this action to evaluate SQL queries stored - in Amazon S3. - - :type pipeline_id: string - :param pipeline_id: The identifier of the pipeline. - - :type object_id: string - :param object_id: The identifier of the object. - - :type expression: string - :param expression: The expression to evaluate. - - """ - params = { - 'pipelineId': pipeline_id, - 'objectId': object_id, - 'expression': expression, - } - return self.make_request(action='EvaluateExpression', - body=json.dumps(params)) - - def get_pipeline_definition(self, pipeline_id, version=None): - """ - Returns the definition of the specified pipeline. You can call - GetPipelineDefinition to retrieve the pipeline definition you - provided using PutPipelineDefinition. - - :type pipeline_id: string - :param pipeline_id: The identifier of the pipeline. - - :type version: string - :param version: The version of the pipeline definition to retrieve. - This parameter accepts the values `latest` (default) and `active`. - Where `latest` indicates the last definition saved to the pipeline - and `active` indicates the last definition of the pipeline that was - activated. - - """ - params = {'pipelineId': pipeline_id, } - if version is not None: - params['version'] = version - return self.make_request(action='GetPipelineDefinition', - body=json.dumps(params)) - - def list_pipelines(self, marker=None): - """ - Returns a list of pipeline identifiers for all active - pipelines. Identifiers are returned only for pipelines you - have permission to access. - - :type marker: string - :param marker: The starting point for the results to be returned. The - first time you call ListPipelines, this value should be empty. As - long as the action returns `HasMoreResults` as `True`, you can call - ListPipelines again and pass the marker value from the response to - retrieve the next set of results. - - """ - params = {} - if marker is not None: - params['marker'] = marker - return self.make_request(action='ListPipelines', - body=json.dumps(params)) - - def poll_for_task(self, worker_group, hostname=None, - instance_identity=None): - """ - Task runners call this action to receive a task to perform - from AWS Data Pipeline. The task runner specifies which tasks - it can perform by setting a value for the workerGroup - parameter of the PollForTask call. The task returned by - PollForTask may come from any of the pipelines that match the - workerGroup value passed in by the task runner and that was - launched using the IAM user credentials specified by the task - runner. - - If tasks are ready in the work queue, PollForTask returns a - response immediately. If no tasks are available in the queue, - PollForTask uses long-polling and holds on to a poll - connection for up to a 90 seconds during which time the first - newly scheduled task is handed to the task runner. To - accomodate this, set the socket timeout in your task runner to - 90 seconds. The task runner should not call PollForTask again - on the same `workerGroup` until it receives a response, and - this may take up to 90 seconds. - - :type worker_group: string - :param worker_group: Indicates the type of task the task runner is - configured to accept and process. The worker group is set as a - field on objects in the pipeline when they are created. You can - only specify a single value for `workerGroup` in the call to - PollForTask. There are no wildcard values permitted in - `workerGroup`, the string must be an exact, case-sensitive, match. - - :type hostname: string - :param hostname: The public DNS name of the calling task runner. - - :type instance_identity: dict - :param instance_identity: Identity information for the Amazon EC2 - instance that is hosting the task runner. You can get this value by - calling the URI, `http://169.254.169.254/latest/meta-data/instance- - id`, from the EC2 instance. For more information, go to `Instance - Metadata`_ in the Amazon Elastic Compute Cloud User Guide. Passing - in this value proves that your task runner is running on an EC2 - instance, and ensures the proper AWS Data Pipeline service charges - are applied to your pipeline. - - """ - params = {'workerGroup': worker_group, } - if hostname is not None: - params['hostname'] = hostname - if instance_identity is not None: - params['instanceIdentity'] = instance_identity - return self.make_request(action='PollForTask', - body=json.dumps(params)) - - def put_pipeline_definition(self, pipeline_objects, pipeline_id): - """ - Adds tasks, schedules, and preconditions that control the - behavior of the pipeline. You can use PutPipelineDefinition to - populate a new pipeline or to update an existing pipeline that - has not yet been activated. - - PutPipelineDefinition also validates the configuration as it - adds it to the pipeline. Changes to the pipeline are saved - unless one of the following three validation errors exists in - the pipeline. - - #. An object is missing a name or identifier field. - #. A string or reference field is empty. - #. The number of objects in the pipeline exceeds the maximum - allowed objects. - - - - Pipeline object definitions are passed to the - PutPipelineDefinition action and returned by the - GetPipelineDefinition action. - - :type pipeline_id: string - :param pipeline_id: The identifier of the pipeline to be configured. - - :type pipeline_objects: list - :param pipeline_objects: The objects that define the pipeline. These - will overwrite the existing pipeline definition. - - """ - params = { - 'pipelineId': pipeline_id, - 'pipelineObjects': pipeline_objects, - } - return self.make_request(action='PutPipelineDefinition', - body=json.dumps(params)) - - def query_objects(self, pipeline_id, sphere, marker=None, query=None, - limit=None): - """ - Queries a pipeline for the names of objects that match a - specified set of conditions. - - The objects returned by QueryObjects are paginated and then - filtered by the value you set for query. This means the action - may return an empty result set with a value set for marker. If - `HasMoreResults` is set to `True`, you should continue to call - QueryObjects, passing in the returned value for marker, until - `HasMoreResults` returns `False`. - - :type pipeline_id: string - :param pipeline_id: Identifier of the pipeline to be queried for object - names. - - :type query: dict - :param query: Query that defines the objects to be returned. The Query - object can contain a maximum of ten selectors. The conditions in - the query are limited to top-level String fields in the object. - These filters can be applied to components, instances, and - attempts. - - :type sphere: string - :param sphere: Specifies whether the query applies to components or - instances. Allowable values: `COMPONENT`, `INSTANCE`, `ATTEMPT`. - - :type marker: string - :param marker: The starting point for the results to be returned. The - first time you call QueryObjects, this value should be empty. As - long as the action returns `HasMoreResults` as `True`, you can call - QueryObjects again and pass the marker value from the response to - retrieve the next set of results. - - :type limit: integer - :param limit: Specifies the maximum number of object names that - QueryObjects will return in a single call. The default value is - 100. - - """ - params = {'pipelineId': pipeline_id, 'sphere': sphere, } - if query is not None: - params['query'] = query - if marker is not None: - params['marker'] = marker - if limit is not None: - params['limit'] = limit - return self.make_request(action='QueryObjects', - body=json.dumps(params)) - - def report_task_progress(self, task_id): - """ - Updates the AWS Data Pipeline service on the progress of the - calling task runner. When the task runner is assigned a task, - it should call ReportTaskProgress to acknowledge that it has - the task within 2 minutes. If the web service does not recieve - this acknowledgement within the 2 minute window, it will - assign the task in a subsequent PollForTask call. After this - initial acknowledgement, the task runner only needs to report - progress every 15 minutes to maintain its ownership of the - task. You can change this reporting time from 15 minutes by - specifying a `reportProgressTimeout` field in your pipeline. - If a task runner does not report its status after 5 minutes, - AWS Data Pipeline will assume that the task runner is unable - to process the task and will reassign the task in a subsequent - response to PollForTask. task runners should call - ReportTaskProgress every 60 seconds. - - :type task_id: string - :param task_id: Identifier of the task assigned to the task runner. - This value is provided in the TaskObject that the service returns - with the response for the PollForTask action. - - """ - params = {'taskId': task_id, } - return self.make_request(action='ReportTaskProgress', - body=json.dumps(params)) - - def report_task_runner_heartbeat(self, taskrunner_id, worker_group=None, - hostname=None): - """ - Task runners call ReportTaskRunnerHeartbeat every 15 minutes - to indicate that they are operational. In the case of AWS Data - Pipeline Task Runner launched on a resource managed by AWS - Data Pipeline, the web service can use this call to detect - when the task runner application has failed and restart a new - instance. - - :type taskrunner_id: string - :param taskrunner_id: The identifier of the task runner. This value - should be unique across your AWS account. In the case of AWS Data - Pipeline Task Runner launched on a resource managed by AWS Data - Pipeline, the web service provides a unique identifier when it - launches the application. If you have written a custom task runner, - you should assign a unique identifier for the task runner. - - :type worker_group: string - :param worker_group: Indicates the type of task the task runner is - configured to accept and process. The worker group is set as a - field on objects in the pipeline when they are created. You can - only specify a single value for `workerGroup` in the call to - ReportTaskRunnerHeartbeat. There are no wildcard values permitted - in `workerGroup`, the string must be an exact, case-sensitive, - match. - - :type hostname: string - :param hostname: The public DNS name of the calling task runner. - - """ - params = {'taskrunnerId': taskrunner_id, } - if worker_group is not None: - params['workerGroup'] = worker_group - if hostname is not None: - params['hostname'] = hostname - return self.make_request(action='ReportTaskRunnerHeartbeat', - body=json.dumps(params)) - - def set_status(self, object_ids, status, pipeline_id): - """ - Requests that the status of an array of physical or logical - pipeline objects be updated in the pipeline. This update may - not occur immediately, but is eventually consistent. The - status that can be set depends on the type of object. - - :type pipeline_id: string - :param pipeline_id: Identifies the pipeline that contains the objects. - - :type object_ids: list - :param object_ids: Identifies an array of objects. The corresponding - objects can be either physical or components, but not a mix of both - types. - - :type status: string - :param status: Specifies the status to be set on all the objects in - `objectIds`. For components, this can be either `PAUSE` or - `RESUME`. For instances, this can be either `CANCEL`, `RERUN`, or - `MARK_FINISHED`. - - """ - params = { - 'pipelineId': pipeline_id, - 'objectIds': object_ids, - 'status': status, - } - return self.make_request(action='SetStatus', - body=json.dumps(params)) - - def set_task_status(self, task_id, task_status, error_id=None, - error_message=None, error_stack_trace=None): - """ - Notifies AWS Data Pipeline that a task is completed and - provides information about the final status. The task runner - calls this action regardless of whether the task was - sucessful. The task runner does not need to call SetTaskStatus - for tasks that are canceled by the web service during a call - to ReportTaskProgress. - - :type task_id: string - :param task_id: Identifies the task assigned to the task runner. This - value is set in the TaskObject that is returned by the PollForTask - action. - - :type task_status: string - :param task_status: If `FINISHED`, the task successfully completed. If - `FAILED` the task ended unsuccessfully. The `FALSE` value is used - by preconditions. - - :type error_id: string - :param error_id: If an error occurred during the task, this value - specifies an id value that represents the error. This value is set - on the physical attempt object. It is used to display error - information to the user. It should not start with string "Service_" - which is reserved by the system. - - :type error_message: string - :param error_message: If an error occurred during the task, this value - specifies a text description of the error. This value is set on the - physical attempt object. It is used to display error information to - the user. The web service does not parse this value. - - :type error_stack_trace: string - :param error_stack_trace: If an error occurred during the task, this - value specifies the stack trace associated with the error. This - value is set on the physical attempt object. It is used to display - error information to the user. The web service does not parse this - value. - - """ - params = {'taskId': task_id, 'taskStatus': task_status, } - if error_id is not None: - params['errorId'] = error_id - if error_message is not None: - params['errorMessage'] = error_message - if error_stack_trace is not None: - params['errorStackTrace'] = error_stack_trace - return self.make_request(action='SetTaskStatus', - body=json.dumps(params)) - - def validate_pipeline_definition(self, pipeline_objects, pipeline_id): - """ - Tests the pipeline definition with a set of validation checks - to ensure that it is well formed and can run without error. - - :type pipeline_id: string - :param pipeline_id: Identifies the pipeline whose definition is to be - validated. - - :type pipeline_objects: list - :param pipeline_objects: A list of objects that define the pipeline - changes to validate against the pipeline. - - """ - params = { - 'pipelineId': pipeline_id, - 'pipelineObjects': pipeline_objects, - } - return self.make_request(action='ValidatePipelineDefinition', - body=json.dumps(params)) - - def make_request(self, action, body): - headers = { - 'X-Amz-Target': '%s.%s' % (self.TargetPrefix, action), - 'Host': self.region.endpoint, - 'Content-Type': 'application/x-amz-json-1.1', - 'Content-Length': str(len(body)), - } - http_request = self.build_base_http_request( - method='POST', path='/', auth_path='/', params={}, - headers=headers, data=body) - response = self._mexe(http_request, sender=None, - override_num_retries=10) - response_body = response.read().decode('utf-8') - boto.log.debug(response_body) - if response.status == 200: - if response_body: - return json.loads(response_body) - else: - json_body = json.loads(response_body) - fault_name = json_body.get('__type', None) - exception_class = self._faults.get(fault_name, self.ResponseError) - raise exception_class(response.status, response.reason, - body=json_body)