Mercurial > repos > crs4 > hadoop_galaxy
diff hadoop_galaxy-13348e73/put_dataset.xml @ 1:30bd2584b6a0 draft default tip
Uploaded
author | crs4 |
---|---|
date | Wed, 15 Oct 2014 09:39:16 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hadoop_galaxy-13348e73/put_dataset.xml Wed Oct 15 09:39:16 2014 -0400 @@ -0,0 +1,43 @@ +<tool id="hadoop_galaxy_put_dataset" name="Put dataset" version="0.1.4"> + <description>Copy data from Galaxy storage to Hadoop storage.</description> + <requirements> + <requirement type="package" version="0.11">pydoop</requirement> + <requirement type="package" version="0.1.4">hadoop-galaxy</requirement> + </requirements> + + <command> + put_dataset + #if $workspace != "" + --hadoop-workspace "$workspace" + #end if + #if $use_distcp + --distcp + #end if + "$input_pathset" "$output_path" + </command> + + <inputs> + <param name="input_pathset" type="data" format="pathset" label="Galaxy pathset" /> + + <param name="workspace" type="text" label="Path to workspace for Hadoop data" + help="The data will be copied to a new directory under this path. The value can also be set through the HADOOP_GALAXY_PUT_DIR environment variable." /> + + <param name="use_distcp" type="boolean" checked="false" label="Use Hadoop distcp2" + help="Use distcp2 if Hadoop can access Galaxy's storage space and you're copying a large dataset." /> + </inputs> + + <outputs> + <data name="output_path" format="pathset" label="Hadoop pathset from $input_pathset.name" /> + </outputs> + + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + + <help> + This tools copies data from Galaxy's storage to storage that is suitable for + Hadoop jobs. An example of a use case may be to copy data from the Galaxy server + to HDFS. Whether this tool is required depends on your specific local setup. + </help> + +</tool>