annotate hadoop_galaxy-13348e73/put_dataset.xml @ 1:30bd2584b6a0 draft default tip

Uploaded
author crs4
date Wed, 15 Oct 2014 09:39:16 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
1 <tool id="hadoop_galaxy_put_dataset" name="Put dataset" version="0.1.4">
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
2 <description>Copy data from Galaxy storage to Hadoop storage.</description>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
3 <requirements>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
4 <requirement type="package" version="0.11">pydoop</requirement>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
5 <requirement type="package" version="0.1.4">hadoop-galaxy</requirement>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
6 </requirements>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
7
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
8 <command>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
9 put_dataset
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
10 #if $workspace != ""
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
11 --hadoop-workspace "$workspace"
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
12 #end if
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
13 #if $use_distcp
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
14 --distcp
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
15 #end if
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
16 "$input_pathset" "$output_path"
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
17 </command>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
18
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
19 <inputs>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
20 <param name="input_pathset" type="data" format="pathset" label="Galaxy pathset" />
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
21
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
22 <param name="workspace" type="text" label="Path to workspace for Hadoop data"
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
23 help="The data will be copied to a new directory under this path. The value can also be set through the HADOOP_GALAXY_PUT_DIR environment variable." />
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
24
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
25 <param name="use_distcp" type="boolean" checked="false" label="Use Hadoop distcp2"
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
26 help="Use distcp2 if Hadoop can access Galaxy's storage space and you're copying a large dataset." />
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
27 </inputs>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
28
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
29 <outputs>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
30 <data name="output_path" format="pathset" label="Hadoop pathset from $input_pathset.name" />
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
31 </outputs>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
32
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
33 <stdio>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
34 <exit_code range="1:" level="fatal" />
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
35 </stdio>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
36
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
37 <help>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
38 This tools copies data from Galaxy's storage to storage that is suitable for
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
39 Hadoop jobs. An example of a use case may be to copy data from the Galaxy server
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
40 to HDFS. Whether this tool is required depends on your specific local setup.
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
41 </help>
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
42
30bd2584b6a0 Uploaded
crs4
parents:
diff changeset
43 </tool>