comparison humann2.xml @ 0:1ab06263e083 draft

planemo upload for repository https://github.com/ASaiM/galaxytools/tree/master/tools/humann2/ commit b46aa969c01b7e5f4f133192899fa4da286ecf89-dirty
author iuc
date Mon, 13 Mar 2017 12:39:25 -0400
parents
children 1d6d855c10d8
comparison
equal deleted inserted replaced
-1:000000000000 0:1ab06263e083
1 <tool id="humann2" name="HUMAnN2" version="@WRAPPER_VERSION@.0">
2 <description>to profile presence/absence and abundance of microbial pathways and gene families</description>
3 <macros>
4 <import>humann2_macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="2.3.0">bowtie2</requirement>
8 <requirement type="package" version="2.6.0">metaphlan2</requirement>
9 <requirement type="package" version="0.8.24">diamond</requirement>
10 <requirement type="package" version="2.24">rapsearch</requirement>
11 <requirement type="package" version="2.7.10">python</requirement>
12 </expand>
13 <expand macro="version"/>
14 <command detect_errors="exit_code"><![CDATA[
15 #if $nucleotide_db.nucleotide_db_selector == "history"
16 mkdir nucleotide_db
17 &&
18 #for $file in $nucleotide_db.nucleotide_database:
19 cp '$file' 'nucleotide_db/$file.name' &&
20 #end for
21 #end if
22
23 #if $protein_db.protein_db_selector == "history"
24 mkdir protein_db
25 &&
26 #if $translated_alignment == "diamond"
27 diamond makedb
28 --in '$protein_db.protein_database'
29 --db 'protein_db/protein_db'
30 --threads "\${GALAXY_SLOTS:-4}"
31 #else
32 prerapsearch
33 -d '$protein_db.protein_database'
34 -n 'protein_db/protein_db'
35 #end if
36 &&
37 #end if
38
39 #if $taxo_profile.taxonomic_profile_test == "false"
40 #if $taxo_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history"
41 mkdir ref_db
42 &&
43 bowtie2-build '$taxo_profile.metaphlan2_db_choice.bowtie2db' 'ref_db/ref_db'
44 &&
45 python '$__tool_directory__/transform_json_to_pkl.py'
46 --json_input '$taxo_profile.metaphlan2_db_choice.mpa_pkl'
47 --pkl_output 'ref_db/metadata.pkl'
48 &&
49 #end if
50 #end if
51
52 humann2
53 --input '$input'
54 -o 'output'
55 $bypass.bypass_prescreen
56 $bypass.bypass_nucleotide_index
57 $bypass.bypass_translated_search
58 $bypass.bypass_nucleotide_search
59
60 #set $metaphlan_option = "-t rel_ab"
61 #if $taxo_profile.taxonomic_profile_test == "true":
62 --taxonomic-profile '$taxo_profile.taxonomic_profile'
63 #else
64 #if $taxo_profile.metaphlan2_db_choice.metaphlan2_db_selector == "history"
65 #set $metaphlan_option += " --bowtie2db " + 'ref_db/ref_db'
66 #set $metaphlan_option += " --mpa_pkl " + 'ref_db/metadata.pkl'
67 #else
68 #set $path = $taxo_profile.metaphlan2_db_choice.cached_db.fields.path
69 #set $value = $taxo_profile.metaphlan2_db_choice.cached_db.fields.value
70 #set $metaphlan_option += " --bowtie2db " + $path + "/" + $value
71 #set $metaphlan_option += " --mpa_pkl " + $path + "/" + $value + ".pkl"
72 #end if
73 #end if
74 --metaphlan-options="$metaphlan_option"
75
76 #if $nucleotide_db.nucleotide_db_selector == "cached"
77 --nucleotide-database '$nucleotide_db.nucleotide_database.fields.path'
78 #else
79 --nucleotide-database nucleotide_db
80 #end if
81
82 --translated-alignment '$translated_alignment'
83 #if $protein_db.protein_db_selector == "cached"
84 --protein-database '$protein_db.protein_database.fields.path'
85 #else
86 --protein-database protein_db
87 #end if
88 --search-mode '$search_mode'
89 --pathways '$pathways'
90
91 --annotation-gene-index $adv.annotation_gene_index
92 --evalue '$adv.evalue'
93 --threads "\${GALAXY_SLOTS:-4}"
94 --memory-use minimum
95 --prescreen-threshold '$adv.prescreen_threshold'
96 --identity-threshold '$adv.identity_threshold'
97 --translated-subject-coverage-threshold '$adv.translated_subject_coverage_threshold'
98 --translated-query-coverage-threshold '$adv.translated_query_coverage_threshold'
99 #if $adv.id_mapping
100 --id-mapping '$adv.id_mapping'
101 #end if
102 --xipe '$adv.xipe'
103 --minpath '$adv.minpath'
104 --pick-frames '$adv.pick_frames'
105 --gap-fill '$adv.gap_fill'
106 --output-format '$adv.output_format'
107 --output-max-decimals '$adv.output_max_dec'
108 --output-basename 'humann2'
109 $adv.remove_statified_output
110 $adv.remove_column_description_output
111 ]]></command>
112 <inputs>
113 <param argument="--input" type="data" format="fastq,fasta,sam,bam,biom1" label="Input sequence file"/>
114 <section name="bypass" title="Options to bypass some steps" expanded="False">
115 <param name="bypass_prescreen" argument="--bypass-prescreen" type="boolean" checked="false" truevalue="--bypass-prescreen" falsevalue="" label="Bypass the prescreen step and run on the full ChocoPhlAn database?"/>
116 <param name="bypass_nucleotide_index" argument="--bypass-nucleotide-index" type="boolean" checked="false" truevalue="--bypass-nucleotide-index" falsevalue="" label="Bypass the nucleotide index step and run on the indexed ChocoPhlAn database?"/>
117 <param name="bypass_translated_search" argument="--bypass-translated-search" type="boolean" checked="false" truevalue="--bypass-translated-search" falsevalue="" label="Bypass the translated search step?"/>
118 <param name="bypass_nucleotide_search" argument="--bypass-nucleotide-search" type="boolean" checked="false" truevalue="--bypass-translated-search" falsevalue="" label="Bypass the nucleotide search steps?"/>
119 </section>
120 <conditional name="taxo_profile">
121 <param name="taxonomic_profile_test" type="select" label="Use a custom taxonomic profile?" help="The file must have been created by MetaPhlan2">
122 <option value="true">Yes</option>
123 <option value="false" selected="true">No</option>
124 </param>
125 <when value="true">
126 <param name="taxonomic_profile" argument="--taxonomic-profile" type="data" format="tabular,txt" label="Taxonomic profile file"/>
127 </when>
128 <when value="false">
129 <conditional name="metaphlan2_db_choice">
130 <param name="metaphlan2_db_selector" type="select" label="Database with clade-specific marker genes">
131 <option value="cached" selected="true">Default MetaPhlAn2 database</option>
132 <option value="history">From history</option>
133 </param>
134 <when value="cached">
135 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select" >
136 <options from_data_table="metaphlan2_database" />
137 </param>
138 </when>
139 <when value="history">
140 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
141 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associate to the database with clade-specific marker genes from history"/>
142 </when>
143 </conditional>
144 </when>
145 </conditional>
146 <conditional name="nucleotide_db">
147 <param name="nucleotide_db_selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
148 <option value="cached" selected="true">Locally cached</option>
149 <option value="history">From history (as collection)</option>
150 </param>
151 <when value="cached">
152 <param name="nucleotide_database" type="select" label="Nucleotide database">
153 <options from_data_table="humann2_nucleotide_database"/>
154 </param>
155 </when>
156 <when value="history">
157 <param format="fasta" name="nucleotide_database" argument="--nucleotide-database" type="data_collection" collection_type="list" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/>
158 </when>
159 </conditional>
160 <param name="translated_alignment" argument="--translated-alignment" type="select" label="Software to use for translated alignment">
161 <option value="rapsearch">Rapsearch</option>
162 <option value="diamond" selected="true">Diamond</option>
163 </param>
164 <conditional name="protein_db">
165 <param name="protein_db_selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
166 <option value="cached" selected="true">Locally cached</option>
167 <option value="history">From history</option>
168 </param>
169 <when value="cached">
170 <param name="protein_database" type="select" label="Protein database">
171 <options from_data_table="humann2_protein_database"/>
172 </param>
173 </when>
174 <when value="history">
175 <param name="protein_database" argument="--protein-database" type="data" format="fasta" label="Protein database from history"/>
176 </when>
177 </conditional>
178 <param name="search_mode" argument="--search-mode" type="select" label="Search for uniref50 or uniref90 gene families?">
179 <option value="uniref50" selected="true">uniref50</option>
180 <option value="uniref90">uniref90</option>
181 </param>
182 <param name="pathways" type="select" label="Database to use for pathway computations" help="(--pathways)">
183 <option value="metacyc" selected="true">MetaCyc</option>
184 <option value="unipathway">UniPathway</option>
185 </param>
186
187 <section name="adv" title="Advanced Options" expanded="False">
188 <param name="annotation_gene_index" argument="--annotation-gene-index" type="integer" value="8" label="Index of the gene in the sequence annotation"/>
189 <param argument="--evalue" type="float" value="1" label="E-value threshold to use with the translated search"/>
190 <param name="prescreen_threshold" argument="--prescreen-threshold" type="float" value="0.01" label="Minimum percentage of reads matching a species"/>
191 <param name="identity_threshold" argument="--identity-threshold" type="float" value="50" label="Identity threshold for alignments"/>
192 <param name="translated_subject_coverage_threshold" argument="--translated-subject-coverage-threshold" type="float" value="50" label="Subject coverage threshold for translated alignments"/>
193 <param name="translated_query_coverage_threshold" argument="--translated-query-coverage-threshold" type="float" value="50" label="Subject coverage threshold for translated alignments"/>
194 <param name="id_mapping" argument="--id-mapping" type="data" format="tsv" label="id mapping file for alignments" optional="True"/>
195 <param argument="--xipe" type="boolean" checked="false" truevalue="on" falsevalue="off" label="Use xipe computation?"/>
196 <param argument="--minpath" type="boolean" checked="true" truevalue="on" falsevalue="off" label="Use minpath computation?"/>
197 <param name="pick_frames" type="boolean" checked="true" truevalue="on" falsevalue="off" label="Use pick frames computation?" argument="--pick-frames"/>
198 <param name="gap_fill" type="boolean" checked="true" truevalue="on" falsevalue="off" label="Use gap fill computation?" argument="--gap-fill"/>
199 <param name="output_format" type="select" label="Format of the output files" argument="--output-format">
200 <option value="tsv" selected="true">TSV</option>
201 <option value="biom">BIOM</option>
202 </param>
203 <param name="output_max_dec" type="integer" value="10" label="Number of decimals to output" argument="--output-max-decimals"/>
204 <param name="remove_statified_output" type="boolean" checked="false" truevalue="--remove-stratified-output" falsevalue="" label="Remove stratification from output?" argument="--remove-stratified-output"/>
205 <param name="remove_column_description_output" type="boolean" checked="false" truevalue="--remove-column-description-output" falsevalue="" label="Remove stratification from output?" argument="--remove-column-description-output"/>
206
207 </section>
208 </inputs>
209 <outputs>
210 <data format="tsv" name="gene_families_tsv" from_work_dir="output/humann2_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" >
211 <filter>adv['output_format'] == "tsv"</filter>
212 </data>
213 <data format="biom1" name="gene_families_biom" from_work_dir="output/humann2_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" >
214 <filter>adv['output_format'] == "biom"</filter>
215 </data>
216 <data format="tsv" name="pathcoverage_tsv" from_work_dir="output/humann2_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" >
217 <filter>adv['output_format'] == "tsv"</filter>
218 </data>
219 <data format="biom1" name="pathcoverage_biom" from_work_dir="output/humann2_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" >
220 <filter>adv['output_format'] == "biom"</filter>
221 </data>
222 <data format="tsv" name="pathabundance_tsv" from_work_dir="output/humann2_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" >
223 <filter>adv['output_format'] == "tsv"</filter>
224 </data>
225 <data format="biom1" name="pathabundance_biom" from_work_dir="output/humann2_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" >
226 <filter>adv['output_format'] == "biom"</filter>
227 </data>
228 </outputs>
229 <tests>
230 <test>
231 <param name="input" value="input_sequences.fasta"/>
232 <param name="bypass_prescreen" value=""/>
233 <param name="bypass_nucleotide_index" value=""/>
234 <param name="bypass_translated_search" value=""/>
235 <param name="bypass_nucleotide_search" value=""/>
236 <param name="nucleotide_db_selector" value="history"/>
237 <param name="nucleotide_database">
238 <collection type="list">
239 <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz" />
240 <element name="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz" />
241 </collection>
242 </param>
243 <param name="annotation_gene_index" value="8" />
244 <param name="protein_db_selector" value="history"/>
245 <param name="protein_database" value="reduced_uniref50.fasta"/>
246 <param name="evalue" value="1"/>
247 <param name="search_mode" value="uniref50"/>
248 <param name="prescreen_threshold" value="0.01"/>
249 <param name="identity_threshold" value="50"/>
250 <param name="translated_subject_coverage_threshold" value="50"/>
251 <param name="translated_query_coverage_threshold" value="50"/>
252 <param name="taxonomic_profile_test" value="true"/>
253 <param name="taxonomic_profile" value="taxonomic_profile.tabular"/>
254 <param name="translated_alignment" value="diamond"/>
255 <param name="xipe" value="off"/>
256 <param name="minpath" value="on"/>
257 <param name="pick_frames" value="on"/>
258 <param name="gap_fill" value="off"/>
259 <param name="output_format" value="tsv"/>
260 <param name="output_max_dec" value="10"/>
261 <param name="remove_statified_output" value=""/>
262 <param name="remove_column_description_output" value=""/>
263 <param name="pathways" value="metacyc"/>
264 <output name="gene_families_tsv">
265 <assert_contents>
266 <has_text text="UniRef50_R5C4D7|g__Bacteroides.s__Bacteroides_thetaiotaomicron" />
267 <has_text text="UniRef50_R5NYX5: Sodium ion-translocating decarboxylase beta subunit|g__Bacteroides.s__Bacteroides_thetaiotaomicron" />
268 <has_text text="UniRef50_A0A016FAQ7: N-6 DNA Methylase family protein (Fragment)|g__Bacteroides.s__Bacteroides_stercoris"/>
269 </assert_contents>
270 </output>
271 <output name="pathcoverage_tsv">
272 <assert_contents>
273 <has_text text="UNMAPPED" />
274 <has_text text="UNINTEGRATED" />
275 </assert_contents>
276 </output>
277 <output name="pathabundance_tsv">
278 <assert_contents>
279 <has_text text="UNMAPPED" />
280 <has_text text="UNINTEGRATED" />
281 </assert_contents>
282 </output>
283 </test>
284 <test>
285 <param name="input" value="input_sequences.fasta"/>
286 <param name="bypass_prescreen" value=""/>
287 <param name="bypass_nucleotide_index" value=""/>
288 <param name="bypass_translated_search" value=""/>
289 <param name="bypass_nucleotide_search" value=""/>
290 <param name="nucleotide_db_selector" value="history"/>
291 <param name="nucleotide_database">
292 <collection type="list">
293 <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_stercoris.centroids.v0.1.1.ffn.gz" />
294 <element name="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1" ftype="fasta" value="g__Bacteroides.s__Bacteroides_thetaiotaomicron.centroids.v0.1.1.ffn.gz" />
295 </collection>
296 </param>
297 <param name="annotation_gene_index" value="8" />
298 <param name="protein_db_selector" value="history"/>
299 <param name="protein_database" value="reduced_uniref50.fasta"/>
300 <param name="evalue" value="1"/>
301 <param name="search_mode" value="uniref90"/>
302 <param name="prescreen_threshold" value="0.01"/>
303 <param name="identity_threshold" value="50"/>
304 <param name="translated_subject_coverage_threshold" value="50"/>
305 <param name="translated_query_coverage_threshold" value="50"/>
306 <param name="taxonomic_profile_test" value="false"/>
307 <param name="metaphlan2_db_selector" value="history"/>
308 <param name="bowtie2db" value="marker_sequences.fasta"/>
309 <param name="mpa_pkl" value="marker_metadata.json"/>
310 <param name="translated_alignment" value="rapsearch"/>
311 <param name="xipe" value="off"/>
312 <param name="minpath" value="on"/>
313 <param name="pick_frames" value="on"/>
314 <param name="gap_fill" value="off"/>
315 <param name="output_format" value="tsv"/>
316 <param name="output_max_dec" value="10"/>
317 <param name="remove_statified_output" value=""/>
318 <param name="remove_column_description_output" value=""/>
319 <param name="pathways" value="unipathway"/>
320 <output name="gene_families_tsv">
321 <assert_contents>
322 <has_text text="UNMAPPED" />
323 <has_text text="# Gene Family" />
324 </assert_contents>
325 </output>
326 <output name="pathcoverage_tsv">
327 <assert_contents>
328 <has_text text="UNMAPPED" />
329 <has_text text="UNINTEGRATED" />
330 </assert_contents>
331 </output>
332 <output name="pathabundance_tsv">
333 <assert_contents>
334 <has_text text="UNMAPPED" />
335 <has_text text="UNINTEGRATED" />
336 </assert_contents>
337 </output>
338 </test>
339 </tests>
340 <help><![CDATA[
341 @HELP_HEADER@
342
343 This tool corresponds to the main tool in HUMAnN2 pipeline.
344
345 **Inputs**
346
347 The input is a single file corresponding either to filtered shotgun sequencing metagenome file (fastq, fastq.gz, fasta, or fasta.gz format), alignment file (sam, bam or blastm8 format) or gene table file (tsv or biom format).
348
349 A file with a taxonomic profile (obtained with MetaPhlan2) can also be provided to avoid first step of taxonomic profiling needed to select pangenomes in protein database. Otherwise, default MetaPhlAn2 or custom databases can be used for taxonomic profiling. For custom databases, a fasta file with marker gene sequences is required and also a json file containing metadata:
350
351 ::
352
353 {
354 "taxonomy": {
355 "taxonomy of genome1": genome1_length,
356 "taxonomy of genome2": genome2_length,
357 ...
358 }
359 "markers": {
360 "marker1_name": {
361 "clade": the clade that the marker belongs to,
362 "ext": [list of external genomes where the marker appears],
363 "len": length of the marker,
364 "score": score of the marker,
365 "taxon": the taxon of the marker
366 }
367 ...
368 }
369 }
370
371 For functional profiling, HUMAnN2 uses multiple databases. Locally cached nucleotide or protein databases have to be downloaded database before using them (using the dedicated tool). Custom databases can also be used after upload. Nucleotide database have to be provided as a dataset.
372
373 **Outputs**
374
375 HUMAnN creates three output files:
376
377 - A file with gene families and their abundance
378 - A file with pathways and their abundance
379 - A file with pathways and their coverage
380
381 ]]></help>
382 <expand macro="citations"/>
383 </tool>