comparison metaphlan2.xml @ 0:d2448d2bf1f8 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/metaphlan2/ commit 450bf3326f301c344103272b0d761e8625ce0c44-dirty
author bebatut
date Wed, 01 Jun 2016 10:45:26 -0400
parents
children 8991e05c44e4
comparison
equal deleted inserted replaced
-1:000000000000 0:d2448d2bf1f8
1 <tool id="metaphlan2" name="MetaPhlAn2" version="2.5.0">
2
3 <description>to profile the composition of microbial communities</description>
4
5 <macros>
6 <import>metaphlan2_macros.xml</import>
7 </macros>
8
9 <expand macro="requirements"/>
10 <expand macro="stdio"/>
11
12 <version_command>
13 <![CDATA[
14 metaphlan2.py -v
15 ]]>
16 </version_command>
17
18 <command>
19 <![CDATA[
20 (which bowtie2 || exit 200)
21
22 &&
23
24 #if $db.db_selector == "history"
25 mkdir ref_db
26 &&
27 bowtie2-build $db.db_sequences ref_db/ref_db
28 &&
29 python $__tool_directory__/transform_json_to_pkl.py
30 --json_input $db_metadata
31 --pkl_output ref_db/metadata.pkl
32 &&
33 #end if
34
35 metaphlan2.py
36 $input_file
37 -o $output_file
38 --input_type ${input_file.datatype.file_ext}
39
40 --bowtie2_exe `which bowtie2`
41
42 #if $db.db_selector == "cached"
43 #set $table = dict([(_[0], _[2]) for _ in $db.cached_db.input.options.tool_data_table.data])
44 #set $db_choice = $db.cached_db.value
45 --bowtie2db $table[$db_choice]
46 --mpa_pkl $table[$db_choice]".pkl"
47 #else
48 --bowtie2db ref_db/ref_db
49 --mpa_pkl ref_db/metadata.pkl
50 #end if
51
52 --no_map
53
54 -t $analysis_type.analysis_type_select
55 #if $analysis_type.analysis_type_select == "rel_ab"
56 --tax_lev $analysis_type.taxonomic_level
57 #else if $analysis_type.analysis_type_select == "marker_ab_table"
58 --nreads $analysis_type.nreads
59 #else if $analysis_type.analysis_type_select == "marker_pres_table"
60 --pres_th $analysis_type.pres_th
61 #end if
62
63 --min_cu_len $min_cu_len
64 --min_alignment_len $min_alignment_len
65
66 $ignore_viruses
67 $ignore_eukaryotes
68 $ignore_bacteria
69 $ignore_archaea
70
71 --stat_q $stat_q
72 -s $sam_output_file
73 ]]>
74 </command>
75
76 <inputs>
77 <param name="input_file" type="data" format="fastq,fasta,sam" label="Input file" help=""/>
78
79 <conditional name="db">
80 <param name="db_selector" type="select" label="Database with clade-specific marker genes" help="">
81 <option value="cached" selected="true">Locally cached</option>
82 <option value="history">From history</option>
83 </param>
84
85 <when value="cached">
86 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select" >
87 <options from_data_table="metaphlan2_db" />
88 </param>
89 </when>
90 <when value="history">
91 <param name="db_sequences" type="data" format="fasta" label="Database with clade-specific marker genes from history" help="(--bowtie2db)"/>
92 <param name="db_metadata" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history" help="(--mpa_pkl)"/>
93 </when>
94 </conditional>
95
96 <conditional name="analysis_type">
97 <param name="analysis_type_select" type="select" label="Type of analysis to perform" help="(-t)">
98 <option value="rel_ab" selected="true">Profiling a metagenomes in terms of relative abundances</option>
99 <option value="reads_map">Mapping from reads to clades (only reads hitting a marker)</option>
100 <option value="clade_profiles">Normalized marker counts for clades with at least a non-null marker</option>
101 <option value="marker_ab_table">Normalized marker counts (only when > 0.0 and normalized by metagenome size if --nreads is specified)</option>
102 <option value="marker_counts">Non-normalized marker counts (use with extreme caution)</option>
103 <option value="marker_pres_table">List of markers present in the sample (threshold at 1.0 if not differently specified with --pres_th</option>
104 </param>
105
106 <when value="rel_ab">
107 <param name="taxonomic_level" type="select" label="Taxonomic level for the relative abundance output" help="(--tax_lev)">
108 <option value="a" selected="true">All taxonomic levels</option>
109 <option value="k">Kingdoms (Bacteria and Archaea) only</option>
110 <option value="p">Phyla only</option>
111 <option value="c">Classes only</option>
112 <option value="o">Orders only</option>
113 <option value="f">Families only</option>
114 <option value="g">Genera only</option>
115 <option value="s">Species only</option>
116 </param>
117 </when>
118
119 <when value="reads_map"/>
120 <when value="clade_profiles"/>
121
122 <when value="marker_ab_table">
123 <param name="nreads" type="integer" value="0" label="Total number of reads in the original metagenome" help="It is used for normalizing the length-normalized counts with the metagenome size as well. No normalization applied if the value is not specified"/>
124 </when>
125
126 <when value="marker_counts"/>
127
128 <when value="marker_pres_table">
129 <param name="pres_th" type="integer" value="0" label=" Threshold for calling a marker present" help=""/>
130 </when>
131 </conditional>
132
133 <param name="min_cu_len" type="integer" value="2000" label="Minimum total nucleotide length for the markers in a clade for estimating the abundance without considering sub-clade abundances" help=""/>
134
135 <param name="min_alignment_len" type="integer" value="0" label="Sam records for aligned reads with the longest subalignment length smaller than this threshold will be discarded." help=""/>
136
137 <param name="ignore_viruses" type='boolean' checked="true" truevalue='' falsevalue='--ignore_viruses' label="Profile viral organisms?" help="" />
138 <param name="ignore_eukaryotes" type='boolean' checked="true" truevalue='' falsevalue='--ignore_eukaryotes' label="Profile eukaryotic organisms?" help="" />
139
140 <param name="ignore_bacteria" type='boolean' checked="true" truevalue='' falsevalue='--ignore_bacteria' label="Profile bacteria organisms?" help="" />
141
142 <param name="ignore_archaea" type='boolean' checked="true" truevalue='' falsevalue='--ignore_archaea' label="Profile archea organisms?" help="" />
143
144 <param name="stat_q" type="float" value="0.1" label="Quantile value for the robust average" help=""/>
145 </inputs>
146
147 <outputs>
148 <data format="tabular" name="output_file" label="${tool.name} on ${on_string}: Community profile" />
149 <data format="sam" name="sam_output_file" label="${tool.name} on ${on_string}: Sam file" />
150 </outputs>
151
152 <tests>
153 <test>
154 <param name="input_file" value="input_sequences.fasta"/>
155 <param name="db_selector" value="history" />
156 <param name="db_metadata" value="marker_metadata.json" />
157 <param name="db_sequences" value="marker_sequences.fasta" />
158 <param name="analysis_type_select" value="rel_ab" />
159 <param name="taxonomic_level" value="a" />
160 <param name="min_cu_len" value="2000" />
161 <param name="min_alignment_len" value="0" />
162 <param name="ignore_viruses" value="" />
163 <param name="ignore_eukaryotes" value="" />
164 <param name="ignore_bacteria" value="" />
165 <param name="ignore_archaea" value="" />
166 <param name="stat_q" value="0.1" />
167 <output name="output_file" file="community_profile.tabular"/>
168 </test>
169 </tests>
170
171 <help><![CDATA[
172 **What it does**
173
174 MetaPhlAn is a computational tool for profiling the composition of microbial communities (Bacteria, Archaea, Eukaryotes and Viruses) from metagenomic shotgun sequencing data with species level resolution. For more information, check the `user manual <https://bitbucket.org/biobakery/metaphlan2/>`_.
175
176 **Inputs**
177
178 Metaphlan2 takes as input a sequence file in fasta, fastq, a BowTie2 produced SAM file.
179
180 It is also possible to use a custom database with clade-specific marker genes. In this case, a fasta file with marker gene sequences is required and also a file containing metadata. This file is a json file with:
181
182 ::
183
184 {
185 "taxonomy": {
186 "taxonomy of genome1": genome1_length,
187 "taxonomy of genome2": genome2_length,
188 ...
189 }
190 "markers": {
191 "marker1_name": {
192 "clade": the clade that the marker belongs to,
193 "ext": [list of external genomes where the marker appears],
194 "len": length of the marker,
195 "score": score of the marker,
196 "taxon": the taxon of the marker
197 }
198 ...
199 }
200 }
201
202 The marker names correspond to sequence name in corresponding fasta file with marker gene sequences.
203
204 **Outputs**
205
206 The main output file is a tab-separated output file of the predicted taxon relative abundances.
207
208 ]]></help>
209
210 <citations>
211 <citation type="doi">10.1038/nmeth.3589</citation>
212 </citations>
213 </tool>