Mercurial > repos > crs4 > hadoop_galaxy

diff cat_paths.xml @ 1:30bd2584b6a0 draft default tip
Uploaded
author: crs4
date: Wed, 15 Oct 2014 09:39:16 -0400
parents: 7698311d4466
--- a/cat_paths.xml	Fri May 30 06:48:47 2014 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-<tool id="hadoop_galaxy_cat_paths" name="Cat paths" version="0.1.0">
-  <description>Concatenate all components of a pathset into a single file.</description>
-  <requirements>
-    <requirement type="package" version="0.11">pydoop</requirement>
-    <requirement type="package" version="0.1.1">hadoop-galaxy</requirement>
-  </requirements>
-
-  <command>
-    #if $use_hadoop
-      dist_cat_paths
-    #else
-      cat_paths
-    #end if
-    #if $delete_source
-      --delete-source
-    #end if
-    $input_pathset $output_path
-  </command>
-
-  <inputs>
-    <param name="input_pathset" type="data" format="pathset" label="Input pathset">
-      <validator type="empty_field" />
-    </param>
-    <param name="delete_source" type="boolean" checked="false" label="Delete remote input data"
-        help="This option makes the tool move the data rather than copy it" />
-    <param name="use_hadoop" type="boolean" checked="false" label="Use Hadoop-based program"
-        help="The Galaxy workspace must be accessible by the Hadoop cluster (see help for details)" />
-  </inputs>
-
-  <outputs>
-      <!-- TODO: can we read the format from input pathset and transfer it to output? -->
-      <data name="output_path" format="data" label="Concatenated dataset $input_pathset.name" />
-  </outputs>
-
-  <stdio>
-    <exit_code range="1:" level="fatal" />
-  </stdio>
-
-  <help>
-Datasets represented as pathsets can be split in a number of files.
-This tool takes all of them and concatenates them into a single output file.
-
-In your workflow, you'll need to explicitly set the appropriate data format on the
-output dataset with an Action to "Change Datatype".
-
-"Delete remote input data" option
-====================================
-With this option, after the data has been concated into the new Galaxy dataset,
-the original files that were referenced by the pathset are deleted.  This effectively
-tells the action to "move" the data instead of a "copying" it and helps
-avoid amassing intermediate data in your Hadoop workspace.
-
-
-"Use Hadoop-based program" option
-====================================
-
-With this option you will use your entire Hadoop cluster to simultaneously write
-multiple parts of the final file.  For this to be possible, the Hadoop nodes
-must be able to access the Galaxy file space directly.  In addition, to achieve
-reasonable results the Galaxy workspace should on a parallel shared file system.
-  </help>
-</tool>
author	crs4
date	Wed, 15 Oct 2014 09:39:16 -0400
parents	7698311d4466
children