Mercurial > repos > guerler > springsuite
diff planemo/lib/python3.7/site-packages/boto/datapipeline/layer1.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:18:57 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/planemo/lib/python3.7/site-packages/boto/datapipeline/layer1.py Fri Jul 31 00:18:57 2020 -0400 @@ -0,0 +1,639 @@ +# Copyright (c) 2013 Amazon.com, Inc. or its affiliates. All Rights Reserved +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, dis- +# tribute, sublicense, and/or sell copies of the Software, and to permit +# persons to whom the Software is furnished to do so, subject to the fol- +# lowing conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- +# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +import boto +from boto.compat import json +from boto.connection import AWSQueryConnection +from boto.regioninfo import RegionInfo +from boto.exception import JSONResponseError +from boto.datapipeline import exceptions + + +class DataPipelineConnection(AWSQueryConnection): + """ + This is the AWS Data Pipeline API Reference . This guide provides + descriptions and samples of the AWS Data Pipeline API. + + AWS Data Pipeline is a web service that configures and manages a + data-driven workflow called a pipeline. AWS Data Pipeline handles + the details of scheduling and ensuring that data dependencies are + met so your application can focus on processing the data. + + The AWS Data Pipeline API implements two main sets of + functionality. The first set of actions configure the pipeline in + the web service. You call these actions to create a pipeline and + define data sources, schedules, dependencies, and the transforms + to be performed on the data. + + The second set of actions are used by a task runner application + that calls the AWS Data Pipeline API to receive the next task + ready for processing. The logic for performing the task, such as + querying the data, running data analysis, or converting the data + from one format to another, is contained within the task runner. + The task runner performs the task assigned to it by the web + service, reporting progress to the web service as it does so. When + the task is done, the task runner reports the final success or + failure of the task to the web service. + + AWS Data Pipeline provides an open-source implementation of a task + runner called AWS Data Pipeline Task Runner. AWS Data Pipeline + Task Runner provides logic for common data management scenarios, + such as performing database queries and running data analysis + using Amazon Elastic MapReduce (Amazon EMR). You can use AWS Data + Pipeline Task Runner as your task runner, or you can write your + own task runner to provide custom data management. + + The AWS Data Pipeline API uses the Signature Version 4 protocol + for signing requests. For more information about how to sign a + request with this protocol, see `Signature Version 4 Signing + Process`_. In the code examples in this reference, the Signature + Version 4 Request parameters are represented as AuthParams. + """ + APIVersion = "2012-10-29" + DefaultRegionName = "us-east-1" + DefaultRegionEndpoint = "datapipeline.us-east-1.amazonaws.com" + ServiceName = "DataPipeline" + TargetPrefix = "DataPipeline" + ResponseError = JSONResponseError + + _faults = { + "PipelineDeletedException": exceptions.PipelineDeletedException, + "InvalidRequestException": exceptions.InvalidRequestException, + "TaskNotFoundException": exceptions.TaskNotFoundException, + "PipelineNotFoundException": exceptions.PipelineNotFoundException, + "InternalServiceError": exceptions.InternalServiceError, + } + + def __init__(self, **kwargs): + region = kwargs.pop('region', None) + if not region: + region = RegionInfo(self, self.DefaultRegionName, + self.DefaultRegionEndpoint) + kwargs['host'] = region.endpoint + super(DataPipelineConnection, self).__init__(**kwargs) + self.region = region + + def _required_auth_capability(self): + return ['hmac-v4'] + + def activate_pipeline(self, pipeline_id): + """ + Validates a pipeline and initiates processing. If the pipeline + does not pass validation, activation fails. + + Call this action to start processing pipeline tasks of a + pipeline you've created using the CreatePipeline and + PutPipelineDefinition actions. A pipeline cannot be modified + after it has been successfully activated. + + :type pipeline_id: string + :param pipeline_id: The identifier of the pipeline to activate. + + """ + params = {'pipelineId': pipeline_id, } + return self.make_request(action='ActivatePipeline', + body=json.dumps(params)) + + def create_pipeline(self, name, unique_id, description=None): + """ + Creates a new empty pipeline. When this action succeeds, you + can then use the PutPipelineDefinition action to populate the + pipeline. + + :type name: string + :param name: The name of the new pipeline. You can use the same name + for multiple pipelines associated with your AWS account, because + AWS Data Pipeline assigns each new pipeline a unique pipeline + identifier. + + :type unique_id: string + :param unique_id: A unique identifier that you specify. This identifier + is not the same as the pipeline identifier assigned by AWS Data + Pipeline. You are responsible for defining the format and ensuring + the uniqueness of this identifier. You use this parameter to ensure + idempotency during repeated calls to CreatePipeline. For example, + if the first call to CreatePipeline does not return a clear + success, you can pass in the same unique identifier and pipeline + name combination on a subsequent call to CreatePipeline. + CreatePipeline ensures that if a pipeline already exists with the + same name and unique identifier, a new pipeline will not be + created. Instead, you'll receive the pipeline identifier from the + previous attempt. The uniqueness of the name and unique identifier + combination is scoped to the AWS account or IAM user credentials. + + :type description: string + :param description: The description of the new pipeline. + + """ + params = {'name': name, 'uniqueId': unique_id, } + if description is not None: + params['description'] = description + return self.make_request(action='CreatePipeline', + body=json.dumps(params)) + + def delete_pipeline(self, pipeline_id): + """ + Permanently deletes a pipeline, its pipeline definition and + its run history. You cannot query or restore a deleted + pipeline. AWS Data Pipeline will attempt to cancel instances + associated with the pipeline that are currently being + processed by task runners. Deleting a pipeline cannot be + undone. + + To temporarily pause a pipeline instead of deleting it, call + SetStatus with the status set to Pause on individual + components. Components that are paused by SetStatus can be + resumed. + + :type pipeline_id: string + :param pipeline_id: The identifier of the pipeline to be deleted. + + """ + params = {'pipelineId': pipeline_id, } + return self.make_request(action='DeletePipeline', + body=json.dumps(params)) + + def describe_objects(self, object_ids, pipeline_id, marker=None, + evaluate_expressions=None): + """ + Returns the object definitions for a set of objects associated + with the pipeline. Object definitions are composed of a set of + fields that define the properties of the object. + + :type pipeline_id: string + :param pipeline_id: Identifier of the pipeline that contains the object + definitions. + + :type object_ids: list + :param object_ids: Identifiers of the pipeline objects that contain the + definitions to be described. You can pass as many as 25 identifiers + in a single call to DescribeObjects. + + :type evaluate_expressions: boolean + :param evaluate_expressions: Indicates whether any expressions in the + object should be evaluated when the object descriptions are + returned. + + :type marker: string + :param marker: The starting point for the results to be returned. The + first time you call DescribeObjects, this value should be empty. As + long as the action returns `HasMoreResults` as `True`, you can call + DescribeObjects again and pass the marker value from the response + to retrieve the next set of results. + + """ + params = { + 'pipelineId': pipeline_id, + 'objectIds': object_ids, + } + if evaluate_expressions is not None: + params['evaluateExpressions'] = evaluate_expressions + if marker is not None: + params['marker'] = marker + return self.make_request(action='DescribeObjects', + body=json.dumps(params)) + + def describe_pipelines(self, pipeline_ids): + """ + Retrieve metadata about one or more pipelines. The information + retrieved includes the name of the pipeline, the pipeline + identifier, its current state, and the user account that owns + the pipeline. Using account credentials, you can retrieve + metadata about pipelines that you or your IAM users have + created. If you are using an IAM user account, you can + retrieve metadata about only those pipelines you have read + permission for. + + To retrieve the full pipeline definition instead of metadata + about the pipeline, call the GetPipelineDefinition action. + + :type pipeline_ids: list + :param pipeline_ids: Identifiers of the pipelines to describe. You can + pass as many as 25 identifiers in a single call to + DescribePipelines. You can obtain pipeline identifiers by calling + ListPipelines. + + """ + params = {'pipelineIds': pipeline_ids, } + return self.make_request(action='DescribePipelines', + body=json.dumps(params)) + + def evaluate_expression(self, pipeline_id, expression, object_id): + """ + Evaluates a string in the context of a specified object. A + task runner can use this action to evaluate SQL queries stored + in Amazon S3. + + :type pipeline_id: string + :param pipeline_id: The identifier of the pipeline. + + :type object_id: string + :param object_id: The identifier of the object. + + :type expression: string + :param expression: The expression to evaluate. + + """ + params = { + 'pipelineId': pipeline_id, + 'objectId': object_id, + 'expression': expression, + } + return self.make_request(action='EvaluateExpression', + body=json.dumps(params)) + + def get_pipeline_definition(self, pipeline_id, version=None): + """ + Returns the definition of the specified pipeline. You can call + GetPipelineDefinition to retrieve the pipeline definition you + provided using PutPipelineDefinition. + + :type pipeline_id: string + :param pipeline_id: The identifier of the pipeline. + + :type version: string + :param version: The version of the pipeline definition to retrieve. + This parameter accepts the values `latest` (default) and `active`. + Where `latest` indicates the last definition saved to the pipeline + and `active` indicates the last definition of the pipeline that was + activated. + + """ + params = {'pipelineId': pipeline_id, } + if version is not None: + params['version'] = version + return self.make_request(action='GetPipelineDefinition', + body=json.dumps(params)) + + def list_pipelines(self, marker=None): + """ + Returns a list of pipeline identifiers for all active + pipelines. Identifiers are returned only for pipelines you + have permission to access. + + :type marker: string + :param marker: The starting point for the results to be returned. The + first time you call ListPipelines, this value should be empty. As + long as the action returns `HasMoreResults` as `True`, you can call + ListPipelines again and pass the marker value from the response to + retrieve the next set of results. + + """ + params = {} + if marker is not None: + params['marker'] = marker + return self.make_request(action='ListPipelines', + body=json.dumps(params)) + + def poll_for_task(self, worker_group, hostname=None, + instance_identity=None): + """ + Task runners call this action to receive a task to perform + from AWS Data Pipeline. The task runner specifies which tasks + it can perform by setting a value for the workerGroup + parameter of the PollForTask call. The task returned by + PollForTask may come from any of the pipelines that match the + workerGroup value passed in by the task runner and that was + launched using the IAM user credentials specified by the task + runner. + + If tasks are ready in the work queue, PollForTask returns a + response immediately. If no tasks are available in the queue, + PollForTask uses long-polling and holds on to a poll + connection for up to a 90 seconds during which time the first + newly scheduled task is handed to the task runner. To + accomodate this, set the socket timeout in your task runner to + 90 seconds. The task runner should not call PollForTask again + on the same `workerGroup` until it receives a response, and + this may take up to 90 seconds. + + :type worker_group: string + :param worker_group: Indicates the type of task the task runner is + configured to accept and process. The worker group is set as a + field on objects in the pipeline when they are created. You can + only specify a single value for `workerGroup` in the call to + PollForTask. There are no wildcard values permitted in + `workerGroup`, the string must be an exact, case-sensitive, match. + + :type hostname: string + :param hostname: The public DNS name of the calling task runner. + + :type instance_identity: dict + :param instance_identity: Identity information for the Amazon EC2 + instance that is hosting the task runner. You can get this value by + calling the URI, `http://169.254.169.254/latest/meta-data/instance- + id`, from the EC2 instance. For more information, go to `Instance + Metadata`_ in the Amazon Elastic Compute Cloud User Guide. Passing + in this value proves that your task runner is running on an EC2 + instance, and ensures the proper AWS Data Pipeline service charges + are applied to your pipeline. + + """ + params = {'workerGroup': worker_group, } + if hostname is not None: + params['hostname'] = hostname + if instance_identity is not None: + params['instanceIdentity'] = instance_identity + return self.make_request(action='PollForTask', + body=json.dumps(params)) + + def put_pipeline_definition(self, pipeline_objects, pipeline_id): + """ + Adds tasks, schedules, and preconditions that control the + behavior of the pipeline. You can use PutPipelineDefinition to + populate a new pipeline or to update an existing pipeline that + has not yet been activated. + + PutPipelineDefinition also validates the configuration as it + adds it to the pipeline. Changes to the pipeline are saved + unless one of the following three validation errors exists in + the pipeline. + + #. An object is missing a name or identifier field. + #. A string or reference field is empty. + #. The number of objects in the pipeline exceeds the maximum + allowed objects. + + + + Pipeline object definitions are passed to the + PutPipelineDefinition action and returned by the + GetPipelineDefinition action. + + :type pipeline_id: string + :param pipeline_id: The identifier of the pipeline to be configured. + + :type pipeline_objects: list + :param pipeline_objects: The objects that define the pipeline. These + will overwrite the existing pipeline definition. + + """ + params = { + 'pipelineId': pipeline_id, + 'pipelineObjects': pipeline_objects, + } + return self.make_request(action='PutPipelineDefinition', + body=json.dumps(params)) + + def query_objects(self, pipeline_id, sphere, marker=None, query=None, + limit=None): + """ + Queries a pipeline for the names of objects that match a + specified set of conditions. + + The objects returned by QueryObjects are paginated and then + filtered by the value you set for query. This means the action + may return an empty result set with a value set for marker. If + `HasMoreResults` is set to `True`, you should continue to call + QueryObjects, passing in the returned value for marker, until + `HasMoreResults` returns `False`. + + :type pipeline_id: string + :param pipeline_id: Identifier of the pipeline to be queried for object + names. + + :type query: dict + :param query: Query that defines the objects to be returned. The Query + object can contain a maximum of ten selectors. The conditions in + the query are limited to top-level String fields in the object. + These filters can be applied to components, instances, and + attempts. + + :type sphere: string + :param sphere: Specifies whether the query applies to components or + instances. Allowable values: `COMPONENT`, `INSTANCE`, `ATTEMPT`. + + :type marker: string + :param marker: The starting point for the results to be returned. The + first time you call QueryObjects, this value should be empty. As + long as the action returns `HasMoreResults` as `True`, you can call + QueryObjects again and pass the marker value from the response to + retrieve the next set of results. + + :type limit: integer + :param limit: Specifies the maximum number of object names that + QueryObjects will return in a single call. The default value is + 100. + + """ + params = {'pipelineId': pipeline_id, 'sphere': sphere, } + if query is not None: + params['query'] = query + if marker is not None: + params['marker'] = marker + if limit is not None: + params['limit'] = limit + return self.make_request(action='QueryObjects', + body=json.dumps(params)) + + def report_task_progress(self, task_id): + """ + Updates the AWS Data Pipeline service on the progress of the + calling task runner. When the task runner is assigned a task, + it should call ReportTaskProgress to acknowledge that it has + the task within 2 minutes. If the web service does not recieve + this acknowledgement within the 2 minute window, it will + assign the task in a subsequent PollForTask call. After this + initial acknowledgement, the task runner only needs to report + progress every 15 minutes to maintain its ownership of the + task. You can change this reporting time from 15 minutes by + specifying a `reportProgressTimeout` field in your pipeline. + If a task runner does not report its status after 5 minutes, + AWS Data Pipeline will assume that the task runner is unable + to process the task and will reassign the task in a subsequent + response to PollForTask. task runners should call + ReportTaskProgress every 60 seconds. + + :type task_id: string + :param task_id: Identifier of the task assigned to the task runner. + This value is provided in the TaskObject that the service returns + with the response for the PollForTask action. + + """ + params = {'taskId': task_id, } + return self.make_request(action='ReportTaskProgress', + body=json.dumps(params)) + + def report_task_runner_heartbeat(self, taskrunner_id, worker_group=None, + hostname=None): + """ + Task runners call ReportTaskRunnerHeartbeat every 15 minutes + to indicate that they are operational. In the case of AWS Data + Pipeline Task Runner launched on a resource managed by AWS + Data Pipeline, the web service can use this call to detect + when the task runner application has failed and restart a new + instance. + + :type taskrunner_id: string + :param taskrunner_id: The identifier of the task runner. This value + should be unique across your AWS account. In the case of AWS Data + Pipeline Task Runner launched on a resource managed by AWS Data + Pipeline, the web service provides a unique identifier when it + launches the application. If you have written a custom task runner, + you should assign a unique identifier for the task runner. + + :type worker_group: string + :param worker_group: Indicates the type of task the task runner is + configured to accept and process. The worker group is set as a + field on objects in the pipeline when they are created. You can + only specify a single value for `workerGroup` in the call to + ReportTaskRunnerHeartbeat. There are no wildcard values permitted + in `workerGroup`, the string must be an exact, case-sensitive, + match. + + :type hostname: string + :param hostname: The public DNS name of the calling task runner. + + """ + params = {'taskrunnerId': taskrunner_id, } + if worker_group is not None: + params['workerGroup'] = worker_group + if hostname is not None: + params['hostname'] = hostname + return self.make_request(action='ReportTaskRunnerHeartbeat', + body=json.dumps(params)) + + def set_status(self, object_ids, status, pipeline_id): + """ + Requests that the status of an array of physical or logical + pipeline objects be updated in the pipeline. This update may + not occur immediately, but is eventually consistent. The + status that can be set depends on the type of object. + + :type pipeline_id: string + :param pipeline_id: Identifies the pipeline that contains the objects. + + :type object_ids: list + :param object_ids: Identifies an array of objects. The corresponding + objects can be either physical or components, but not a mix of both + types. + + :type status: string + :param status: Specifies the status to be set on all the objects in + `objectIds`. For components, this can be either `PAUSE` or + `RESUME`. For instances, this can be either `CANCEL`, `RERUN`, or + `MARK_FINISHED`. + + """ + params = { + 'pipelineId': pipeline_id, + 'objectIds': object_ids, + 'status': status, + } + return self.make_request(action='SetStatus', + body=json.dumps(params)) + + def set_task_status(self, task_id, task_status, error_id=None, + error_message=None, error_stack_trace=None): + """ + Notifies AWS Data Pipeline that a task is completed and + provides information about the final status. The task runner + calls this action regardless of whether the task was + sucessful. The task runner does not need to call SetTaskStatus + for tasks that are canceled by the web service during a call + to ReportTaskProgress. + + :type task_id: string + :param task_id: Identifies the task assigned to the task runner. This + value is set in the TaskObject that is returned by the PollForTask + action. + + :type task_status: string + :param task_status: If `FINISHED`, the task successfully completed. If + `FAILED` the task ended unsuccessfully. The `FALSE` value is used + by preconditions. + + :type error_id: string + :param error_id: If an error occurred during the task, this value + specifies an id value that represents the error. This value is set + on the physical attempt object. It is used to display error + information to the user. It should not start with string "Service_" + which is reserved by the system. + + :type error_message: string + :param error_message: If an error occurred during the task, this value + specifies a text description of the error. This value is set on the + physical attempt object. It is used to display error information to + the user. The web service does not parse this value. + + :type error_stack_trace: string + :param error_stack_trace: If an error occurred during the task, this + value specifies the stack trace associated with the error. This + value is set on the physical attempt object. It is used to display + error information to the user. The web service does not parse this + value. + + """ + params = {'taskId': task_id, 'taskStatus': task_status, } + if error_id is not None: + params['errorId'] = error_id + if error_message is not None: + params['errorMessage'] = error_message + if error_stack_trace is not None: + params['errorStackTrace'] = error_stack_trace + return self.make_request(action='SetTaskStatus', + body=json.dumps(params)) + + def validate_pipeline_definition(self, pipeline_objects, pipeline_id): + """ + Tests the pipeline definition with a set of validation checks + to ensure that it is well formed and can run without error. + + :type pipeline_id: string + :param pipeline_id: Identifies the pipeline whose definition is to be + validated. + + :type pipeline_objects: list + :param pipeline_objects: A list of objects that define the pipeline + changes to validate against the pipeline. + + """ + params = { + 'pipelineId': pipeline_id, + 'pipelineObjects': pipeline_objects, + } + return self.make_request(action='ValidatePipelineDefinition', + body=json.dumps(params)) + + def make_request(self, action, body): + headers = { + 'X-Amz-Target': '%s.%s' % (self.TargetPrefix, action), + 'Host': self.region.endpoint, + 'Content-Type': 'application/x-amz-json-1.1', + 'Content-Length': str(len(body)), + } + http_request = self.build_base_http_request( + method='POST', path='/', auth_path='/', params={}, + headers=headers, data=body) + response = self._mexe(http_request, sender=None, + override_num_retries=10) + response_body = response.read().decode('utf-8') + boto.log.debug(response_body) + if response.status == 200: + if response_body: + return json.loads(response_body) + else: + json_body = json.loads(response_body) + fault_name = json_body.get('__type', None) + exception_class = self._faults.get(fault_name, self.ResponseError) + raise exception_class(response.status, response.reason, + body=json_body)