1
|
1 <tool id="hadoop_galaxy_put_dataset" name="Put dataset" version="0.1.4">
|
|
2 <description>Copy data from Galaxy storage to Hadoop storage.</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="0.11">pydoop</requirement>
|
|
5 <requirement type="package" version="0.1.4">hadoop-galaxy</requirement>
|
|
6 </requirements>
|
|
7
|
|
8 <command>
|
|
9 put_dataset
|
|
10 #if $workspace != ""
|
|
11 --hadoop-workspace "$workspace"
|
|
12 #end if
|
|
13 #if $use_distcp
|
|
14 --distcp
|
|
15 #end if
|
|
16 "$input_pathset" "$output_path"
|
|
17 </command>
|
|
18
|
|
19 <inputs>
|
|
20 <param name="input_pathset" type="data" format="pathset" label="Galaxy pathset" />
|
|
21
|
|
22 <param name="workspace" type="text" label="Path to workspace for Hadoop data"
|
|
23 help="The data will be copied to a new directory under this path. The value can also be set through the HADOOP_GALAXY_PUT_DIR environment variable." />
|
|
24
|
|
25 <param name="use_distcp" type="boolean" checked="false" label="Use Hadoop distcp2"
|
|
26 help="Use distcp2 if Hadoop can access Galaxy's storage space and you're copying a large dataset." />
|
|
27 </inputs>
|
|
28
|
|
29 <outputs>
|
|
30 <data name="output_path" format="pathset" label="Hadoop pathset from $input_pathset.name" />
|
|
31 </outputs>
|
|
32
|
|
33 <stdio>
|
|
34 <exit_code range="1:" level="fatal" />
|
|
35 </stdio>
|
|
36
|
|
37 <help>
|
|
38 This tools copies data from Galaxy's storage to storage that is suitable for
|
|
39 Hadoop jobs. An example of a use case may be to copy data from the Galaxy server
|
|
40 to HDFS. Whether this tool is required depends on your specific local setup.
|
|
41 </help>
|
|
42
|
|
43 </tool>
|