comparison humann.xml @ 0:ab86614989fd draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author thanhlv
date Mon, 13 Feb 2023 16:16:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ab86614989fd
1 <tool id="humann3" name="HUMAnN3" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
2 <description>to profile presence/absence and abundance of microbial pathways and gene families</description>
3 <macros>
4 <import>macros.xml</import>
5 <xml name="prescreen">
6 <section name="prescreen" title="Prescreen / Identifying community species" expanded="true">
7 <conditional name="metaphlan_db">
8 <param name="selector" type="select" label="Database with clade-specific marker genes">
9 <option value="cached" selected="true">Locally cached</option>
10 <option value="history">From history</option>
11 </param>
12 <when value="cached">
13 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select">
14 <options from_data_table="metaphlan_database">
15 <validator message="No MetaPhlAn database is available" type="no_options" />
16 </options>
17 </param>
18 </when>
19 <when value="history">
20 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/>
21 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/>
22 </when>
23 </conditional>
24 <param argument="--prescreen-threshold" type="float" value="0.01" min="0" max="100" label="Minimum percentage of reads matching a species"/>
25 <!-- add metaphlan options -->
26 </section>
27 </xml>
28 <token name="@PRESCREEN_PREPARE@"><![CDATA[
29 #if $wf.prescreen.metaphlan_db.selector == "history"
30 mkdir metaphlan_db
31 &&
32 bowtie2-build --large-index '$wf.prescreen.metaphlan_db.bowtie2db' 'metaphlan_db/custom_db-v30'
33 &&
34 python '$__tool_directory__/customizemetadata.py'
35 transform_json_to_pkl
36 --json '$wf.prescreen.metaphlan_db.mpa_pkl'
37 --pkl 'metaphlan_db/custom_db-v30.pkl'
38 &&
39 #end if
40 ]]></token>
41 <token name="@PRESCREEN_RUN@"><![CDATA[
42 #set $metaphlan_option = "-t rel_ab"
43 #if $wf.prescreen.metaphlan_db.selector == "history"
44 #set $metaphlan_option += " --bowtie2db metaphlan_db/"
45 #set $metaphlan_option += " --index custom_db-v30"
46 #else
47 #set $metaphlan_option += " --bowtie2db %s" % $wf.prescreen.metaphlan_db.cached_db.fields.path
48 #set $metaphlan_option += " --index %s" % $wf.prescreen.metaphlan_db.cached_db.fields.dbkey
49 #end if
50 --metaphlan-options="$metaphlan_option"
51 --prescreen-threshold $wf.prescreen.prescreen_threshold
52 ]]></token>
53 <xml name="nucleotide_database">
54 <param argument="--nucleotide-database" type="data_collection" collection_type="list" format="fasta" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/>
55 </xml>
56 <xml name="nucleotide_search">
57 <section name="nucleotide_search" title="Nucleotide search / Mapping reads to community pangenomes" expanded="true">
58 <conditional name="nucleotide_db">
59 <param name="selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
60 <option value="cached" selected="true">Locally cached</option>
61 <option value="history">From history</option>
62 </param>
63 <when value="cached">
64 <param name="nucleotide_database" type="select" label="Nucleotide database">
65 <options from_data_table="humann_nucleotide_database">
66 <validator message="No nucleotide database is available" type="no_options" />
67 </options>
68 </param>
69 </when>
70 <when value="history">
71 <expand macro="nucleotide_database"/>
72 </when>
73 </conditional>
74 <!-- add bowtie2 options -->
75 <param argument="--nucleotide-identity-threshold" type="float" value="0" min="0" max="100"
76 label="Identity threshold for nucleotide alignments"/>
77 <param argument="--nucleotide-subject-coverage-threshold" type="float" value="50" min="0" max="100"
78 label="Subject coverage threshold for nucleotide alignments"/>
79 <param argument="--nucleotide-query-coverage-threshold" type="float" value="90" min="0" max="100"
80 label="Query coverage threshold for nucleotide alignments"/>
81 </section>
82 </xml>
83 <token name="@NUCLEOTIDE_SEARCH_PREPARE@"><![CDATA[
84 #if $wf.nucleotide_search.nucleotide_db.selector == 'history'
85 mkdir nucleotide_db
86 &&
87 #for $f in $wf.nucleotide_search.nucleotide_db.nucleotide_database:
88 ln -s '$f' 'nucleotide_db/${re.sub('[^\w\-_.]', '_', f.element_identifier)}.v201901_v31' &&
89 #end for
90 #end if
91 ]]></token>
92 <token name="@NUCLEOTIDE_SEARCH_RUN@"><![CDATA[
93 #if $wf.nucleotide_search.nucleotide_db.selector == 'history'
94 --nucleotide-database nucleotide_db
95 #else
96 --nucleotide-database '$wf.nucleotide_search.nucleotide_db.nucleotide_database.fields.path'
97 #end if
98 --nucleotide-identity-threshold $wf.nucleotide_search.nucleotide_identity_threshold
99 --nucleotide-subject-coverage-threshold $wf.nucleotide_search.nucleotide_subject_coverage_threshold
100 --nucleotide-query-coverage-threshold $wf.nucleotide_search.nucleotide_query_coverage_threshold
101 ]]></token>
102 <xml name="translated_search">
103 <section name="translated_search" title="Translated search / Aligning unmapped reads to a protein database" expanded="true">
104 <conditional name="protein_db">
105 <param name="selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases">
106 <option value="cached" selected="true">Locally cached</option>
107 <option value="history">From history</option>
108 </param>
109 <when value="cached">
110 <param name="protein_database" type="select" label="Protein database">
111 <options from_data_table="humann_protein_database">
112 <validator message="No protein database is available" type="no_options" />
113 </options>
114 </param>
115 </when>
116 <when value="history">
117 <param argument="--protein-database" type="data" format="fasta" label="Protein database from history"/>
118 <param argument="--search-mode" type="select" label="Search mode">
119 <option value="uniref50">UniRef50</option>
120 <option value="uniref90" selected="true">UniRef90</option>
121 </param>
122 </when>
123 </conditional>
124 <param argument="--evalue" type="float" value="1" label="E-value threshold to use with the translated search"/>
125 <!-- add diamond options -->
126 <param argument="--translated-identity-threshold" type="float" min="0" max="100" optional="true"
127 label="Identity threshold for translated alignments"
128 help="It is tuned automatically (based on uniref mode) unless a custom value is specified"/>
129 <param argument="--translated-subject-coverage-threshold" type="float" value="50" min="0" max="100"
130 label="Subject coverage threshold for translated alignments"/>
131 <param argument="--translated-query-coverage-threshold" type="float" value="90" min="0" max="100"
132 label="Query coverage threshold for translated alignments"/>
133 </section>
134 </xml>
135 <token name="@TRANSLATED_SEARCH_PREPARE@"><![CDATA[
136 #if $wf.translated_search.protein_db.selector == 'history'
137 mkdir protein_db
138 &&
139 diamond makedb
140 --in '$wf.translated_search.protein_db.protein_database'
141 --db 'protein_db/protein-db-201901b'
142 --threads "\${GALAXY_SLOTS:-4}"
143 &&
144 #end if
145 ]]></token>
146 <token name="@TRANSLATED_SEARCH_RUN@"><![CDATA[
147 --translated-alignment 'diamond'
148 #if $wf.translated_search.protein_db.selector == 'history'
149 --protein-database protein_db
150 --search-mode '$wf.translated_search.protein_db.search_mode'
151 #else
152 --protein-database '$wf.translated_search.protein_db.protein_database.fields.path'
153 #if 'uniref50' in $wf.translated_search.protein_db.protein_database.fields.value
154 --search-mode 'uniref50'
155 #else
156 --search-mode 'uniref90'
157 #end if
158 #end if
159 --evalue $wf.translated_search.evalue
160 #if str($wf.translated_search.translated_identity_threshold) != ''
161 --identity-threshold $wf.translated_search.translated_identity_threshold
162 #end if
163 --translated-subject-coverage-threshold $wf.translated_search.translated_subject_coverage_threshold
164 --translated-query-coverage-threshold $wf.translated_search.translated_query_coverage_threshold
165 ]]></token>
166 </macros>
167 <expand macro="edam_ontology"/>
168 <expand macro="requirements"/>
169 <expand macro="version"/>
170 <command detect_errors="exit_code"><![CDATA[
171 #import re
172 #if $in.input.ext.startswith("fasta")
173 #set ext="fasta"
174 #else if $in.input.ext.startswith("fastq")
175 #set ext="fastq"
176 #else if $in.input.ext.endswith("bam")
177 #set ext="bam"
178 #else if $in.input.ext == 'sam'
179 #set ext="sam"
180 #else if $in.input.ext == 'biom1'
181 #set ext="biom"
182 #else
183 >&2 "unknown extension $in.input.ext"
184 exit 1;
185 #end if
186 #if $in.input.ext.endswith(".gz")
187 #set ext+=".gz"
188 #end if
189
190 #if $wf.selector == 'bypass_prescreen'
191 @NUCLEOTIDE_SEARCH_PREPARE@
192 @TRANSLATED_SEARCH_PREPARE@
193 #else if $wf.selector == 'bypass_taxonomic_profiling'
194 @NUCLEOTIDE_SEARCH_PREPARE@
195 @TRANSLATED_SEARCH_PREPARE@
196 #else if $wf.selector == 'bypass_nucleotide_index'
197 @NUCLEOTIDE_SEARCH_PREPARE@
198 @TRANSLATED_SEARCH_PREPARE@
199 #else if $wf.selector == 'bypass_nucleotide_search'
200 @TRANSLATED_SEARCH_PREPARE@
201 #else if $wf.selector == 'bypass_translated_search'
202 @PRESCREEN_PREPARE@
203 @NUCLEOTIDE_SEARCH_PREPARE@
204 #else if $wf.selector == 'none'
205 @PRESCREEN_PREPARE@
206 @NUCLEOTIDE_SEARCH_PREPARE@
207 @TRANSLATED_SEARCH_PREPARE@
208 #end if
209
210 humann
211 --input '$input'
212 --input-format $ext
213 -o 'output'
214 #if $wf.selector == 'bypass_prescreen'
215 --bypass-prescreen
216 @NUCLEOTIDE_SEARCH_RUN@
217 @TRANSLATED_SEARCH_RUN@
218 #else if $wf.selector == 'bypass_taxonomic_profiling'
219 --taxonomic-profile '$wf.taxonomic_profile'
220 @NUCLEOTIDE_SEARCH_RUN@
221 @TRANSLATED_SEARCH_RUN@
222 #else if $wf.selector == 'bypass_nucleotide_index'
223 --bypass-nucleotide-index
224 @NUCLEOTIDE_SEARCH_RUN@
225 @TRANSLATED_SEARCH_RUN@
226 #else if $wf.selector == 'bypass_nucleotide_search'
227 --bypass-nucleotide-search
228 @TRANSLATED_SEARCH_RUN@
229 #else if $wf.selector == 'bypass_translated_search'
230 --bypass-translated-search
231 @PRESCREEN_RUN@
232 @NUCLEOTIDE_SEARCH_RUN@
233 #else if $wf.selector == 'none'
234 @PRESCREEN_RUN@
235 @NUCLEOTIDE_SEARCH_RUN@
236 @TRANSLATED_SEARCH_RUN@
237 #end if
238 --gap-fill '$g_p_quant.gap_fill'
239 --minpath '$g_p_quant.minpath'
240 --pathways '$g_p_quant.pathways'
241 --xipe '$g_p_quant.xipe'
242 --annotation-gene-index $g_p_quant.annotation_gene_index
243 #if $g_p_quant.id_mapping
244 --id-mapping '$g_p_quant.id_mapping'
245 #end if
246 --log-level 'DEBUG'
247 --o-log '$log'
248 --output-basename '$out.output_basename'
249 --output-format '$out.output_format'
250 --output-max-decimals $out.output_max_decimals
251 $out.remove_column_description_output
252 $out.remove_stratified_output
253 --threads "\${GALAXY_SLOTS:-4}"
254 --memory-use minimum
255 ]]></command>
256 <inputs>
257 <conditional name="in">
258 <param name="selector" type="select" label="Input(s)">
259 <option value="raw" selected="true">Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))</option>
260 <option value="mapping">Pre-computed mappings of reads to database sequences</option>
261 <option value="abundance">Pre-computed (typically gene) abundance tables</option>
262 </param>
263 <when value="raw">
264 <param name="input" type="data" format="fastq,fastq.gz,fasta,fasta.gz"
265 label="Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))"
266 help="Paired-end Fasta/FastQ files should be merged first"/>
267 </when>
268 <when value="mapping">
269 <param name="input" type="data" format="sam,bam" label="Pre-computed mappings of reads to database sequences"/>
270 </when>
271 <when value="abundance">
272 <param name="input" type="data" format="tabular,tsv,biom1" label="Pre-computed (typically gene) abundance tables"/>
273 </when>
274 </conditional>
275 <conditional name="wf">
276 <param name="selector" type="select" label="Steps">
277 <option value="bypass_prescreen">Bypass the prescreen step and run on the full ChocoPhlAn database (--bypass-prescreen)</option>
278 <option value="bypass_taxonomic_profiling">Bypass the taxonomic profiling step and creates a custom ChocoPhlAn database of the species provided afterwards</option>
279 <option value="bypass_nucleotide_index">Starts the workflow with the nucleotide alignment step using the provided indexed database (--bypass-nucleotide-index)</option>
280 <option value="bypass_nucleotide_search">Bypass all of the alignment steps before the translated search (--bypass_nucleotide-search)</option>
281 <option value="bypass_translated_search">Run all of the alignment steps except the translated search (--bypass_translated-search)</option>
282 <option value="none" selected="true">Run the full workflow steps</option>
283 </param>
284 <when value="bypass_prescreen">
285 <expand macro="nucleotide_search"/>
286 <expand macro="translated_search"/>
287 </when>
288 <when value="bypass_taxonomic_profiling">
289 <param argument="--taxonomic-profile" type="data" format="tabular,txt,tsv" label="Taxonomic profile file"/>
290 <expand macro="nucleotide_search"/>
291 <expand macro="translated_search"/>
292 </when>
293 <when value="bypass_nucleotide_index">
294 <expand macro="nucleotide_search"/>
295 <expand macro="translated_search"/>
296 </when>
297 <when value="bypass_nucleotide_search">
298 <expand macro="translated_search"/>
299 </when>
300 <when value="bypass_translated_search">
301 <expand macro="prescreen"/>
302 <expand macro="nucleotide_search"/>
303 </when>
304 <when value="none">
305 <expand macro="prescreen"/>
306 <expand macro="nucleotide_search"/>
307 <expand macro="translated_search"/>
308 </when>
309 </conditional>
310 <section name="g_p_quant" title="Gene and pathway quantification" expanded="true">
311 <param argument="--gap-fill" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use gap fill computation?"/>
312 <param argument="--minpath" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use minpath computation?"/>
313 <param argument="--pathways" type="select" label="Database to use for pathway computations">
314 <option value="metacyc" selected="true">MetaCyc</option>
315 <option value="unipathway">UniPathway</option>
316 </param>
317 <param argument="--xipe" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use xipe computation?"/>
318 <param argument="--annotation-gene-index" type="integer" value="3" label="Index of the gene in the sequence annotation"/>
319 <param argument="--id-mapping" type="data" format="tsv" optional="true" label="id mapping file for alignments"/>
320 </section>
321 <section name="out" title="Outputs" expanded="true">
322 <param argument="--output-basename" type="text" value="humann" label="basename">
323 <sanitizer invalid_char="">
324 <valid initial="string.ascii_letters,string.digits">
325 <add value="_" />
326 <add value="-" />
327 </valid>
328 </sanitizer>
329 <validator type="empty_field" />
330 </param>
331 <param argument="--output-format" type="select" label="Format of the output files">
332 <option value="tsv" selected="true">Tabular</option>
333 <option value="biom">BIOM</option>
334 </param>
335 <param argument="--output-max-decimals" type="integer" value="10" label="Number of decimals to output"/>
336 <param argument="--remove-column-description-output" type="boolean" truevalue="--remove-column-description-output" falsevalue="" checked="false" label="Remove description in the output column?"/>
337 <param argument="--remove-stratified-output" type="boolean" truevalue="--remove-stratified-output" falsevalue="" checked="false" label="Remove stratification from output?"/>
338 <param name="intermediate_temp" type="select" multiple="true" label="Intermediate output files">
339 <option value="metaphlan_bowtie2">MetaPhlAn Bowtie2 output</option>
340 <option value="metaphlan_bugs_list">MetaPhlAn bugs list</option>
341 <option value="bowtie2_alignment">Bowtie2 alignment results</option>
342 <option value="bowtie2_reduced_alignment">Bowtie2 reduced alignment results</option>
343 <option value="bowtie2_unaligned">Unaligned reads after Bowtie2</option>
344 <option value="custom_chocophlan_database">Custom ChocoPhlAn database</option>
345 <option value="diamond_aligned">Translated alignment results</option>
346 <option value="diamond_unaligned">Translated alignment unaligned reads</option>
347 </param>
348 </section>
349 </inputs>
350 <outputs>
351 <data name="gene_families_tsv" format="tabular" from_work_dir="output/*_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" >
352 <filter>out['output_format'] == "tsv"</filter>
353 </data>
354 <data name="gene_families_biom" format="biom1" from_work_dir="output/*_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" >
355 <filter>out['output_format'] == "biom"</filter>
356 </data>
357 <data name="pathcoverage_tsv" format="tabular" from_work_dir="output/*_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" >
358 <filter>out['output_format'] == "tsv"</filter>
359 </data>
360 <data name="pathcoverage_biom" format="biom1" from_work_dir="output/*_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" >
361 <filter>out['output_format'] == "biom"</filter>
362 </data>
363 <data name="pathabundance_tsv" format="tabular" from_work_dir="output/*_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" >
364 <filter>out['output_format'] == "tsv"</filter>
365 </data>
366 <data name="pathabundance_biom" format="biom1" from_work_dir="output/*_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" >
367 <filter>out['output_format'] == "biom"</filter>
368 </data>
369 <data format="txt" name="log" label="${tool.name} on ${on_string}: Log"/>
370 <data format="tabular" name="metaphlan_bowtie2" from_work_dir="output/*_temp/*_metaphlan_bowtie2.txt" label="${tool.name} on ${on_string}: MetaPhlAn Bowtie2 output" >
371 <filter>out['intermediate_temp'] and "metaphlan_bowtie2" in out['intermediate_temp']</filter>
372 </data>
373 <data format="tabular" name="metaphlan_bugs_list" from_work_dir="output/*_temp/*_metaphlan_bugs_list.tsv" label="${tool.name} on ${on_string}: MetaPhlAn bugs list" >
374 <filter>out['intermediate_temp'] and "metaphlan_bugs_list" in out['intermediate_temp']</filter>
375 </data>
376 <data format="sam" name="bowtie2_alignment" from_work_dir="output/*_temp/*_bowtie2_aligned.sam" label="${tool.name} on ${on_string}: Bowtie2 alignment results" >
377 <filter>out['intermediate_temp'] and "bowtie2_alignment" in out['intermediate_temp']</filter>
378 </data>
379 <data format="tabular" name="bowtie2_reduced_alignment" from_work_dir="output/*_temp/*_bowtie2_aligned.tsv" label="${tool.name} on ${on_string}: Bowtie2 reduced alignment results" >
380 <filter>out['intermediate_temp'] and "bowtie2_reduced_alignment" in out['intermediate_temp']</filter>
381 </data>
382 <data format="fasta" name="bowtie2_unaligned" from_work_dir="output/*_temp/*_bowtie2_unaligned.fa" label="${tool.name} on ${on_string}: Unaligned reads after Bowtie2" >
383 <filter>out['intermediate_temp'] and "bowtie2_unaligned" in out['intermediate_temp']</filter>
384 </data>
385 <data format="fasta" name="custom_chocophlan_database" from_work_dir="output/*_temp/*_custom_chocophlan_database.ffn" label="${tool.name} on ${on_string}: Custom ChocoPhlAn database" >
386 <filter>out['intermediate_temp'] and "custom_chocophlan_database" in out['intermediate_temp']</filter>
387 </data>
388 <data format="tabular" name="diamond_aligned" from_work_dir="output/*_temp/*_diamond_aligned.tsv" label="${tool.name} on ${on_string}: Translated alignment results" >
389 <filter>out['intermediate_temp'] and "diamond_aligned" in out['intermediate_temp']</filter>
390 </data>
391 <data format="fasta" name="diamond_unaligned" from_work_dir="output/*_temp/*_diamond_unaligned.fa" label="${tool.name} on ${on_string}: Translated alignment unaligned reads" >
392 <filter>out['intermediate_temp'] and "diamond_unaligned" in out['intermediate_temp']</filter>
393 </data>
394 </outputs>
395 <tests>
396 <!-- This test refers to the Metaphlan v3 DB format, and hence should fail, which is to be caught gracefully. -->
397 <test expect_exit_code="42" expect_failure="true">
398 <conditional name="in">
399 <!-- raw fasta file -->
400 <param name="selector" value="raw"/>
401 <param name="input" value="demo.fastq.gz"/>
402 </conditional>
403 <conditional name="wf">
404 <!-- full workflow -->
405 <param name="selector" value="none"/>
406 <section name="prescreen">
407 <conditional name="metaphlan_db">
408 <param name="selector" value="history"/>
409 <param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/>
410 <param name="mpa_pkl" value="test-db/metaphlan-db/old-structure/demo-db-v30.json"/>
411 </conditional>
412 <param name="prescreen_threshold" value="0.01"/>
413 </section>
414 <section name="nucleotide_search">
415 <conditional name="nucleotide_db">
416 <param name="selector" value="history"/>
417 <param name="nucleotide_database">
418 <collection type="list">
419 <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz" />
420 <element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz" />
421 </collection>
422 </param>
423 </conditional>
424 </section>
425 </conditional>
426 </test>
427 <!-- This test refers to the Metaphlan v4 DB format, and hence should be a straight pass. -->
428 <test expect_num_outputs="12">
429 <conditional name="in">
430 <!-- raw fasta file -->
431 <param name="selector" value="raw"/>
432 <param name="input" value="demo.fastq.gz"/>
433 </conditional>
434 <conditional name="wf">
435 <!-- full workflow -->
436 <param name="selector" value="none"/>
437 <section name="prescreen">
438 <conditional name="metaphlan_db">
439 <param name="selector" value="history"/>
440 <param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/>
441 <param name="mpa_pkl" value="test-db/metaphlan-db/demo-db-v30.json"/>
442 </conditional>
443 <param name="prescreen_threshold" value="0.01"/>
444 </section>
445 <section name="nucleotide_search">
446 <conditional name="nucleotide_db">
447 <param name="selector" value="history"/>
448 <param name="nucleotide_database">
449 <collection type="list">
450 <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz"/>
451 <element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz"/>
452 </collection>
453 </param>
454 </conditional>
455 <param name="nucleotide_identity_threshold" value="0"/>
456 <param name="nucleotide_subject_coverage_threshold" value="50"/>
457 <param name="nucleotide_query_coverage_threshold" value="90"/>
458 </section>
459 <section name="translated_search">
460 <conditional name="protein_db">
461 <param name="selector" value="history"/>
462 <param name="protein_database" value="test-db/protein-db/uniref90_demo_prots_v201901b.fasta"/>
463 <param name="search_mode" value="uniref90"/>
464 </conditional>
465 <param name="evalue" value="1"/>
466 <param name="translated_subject_coverage_threshold" value="50"/>
467 <param name="translated_query_coverage_threshold" value="90"/>
468 </section>
469 </conditional>
470 <section name="g_p_quant">
471 <param name="gap_fill" value="true"/>
472 <param name="minpath" value="true"/>
473 <param name="pathways" value="metacyc"/>
474 <param name="xipe" value="false"/>
475 <param name="annotation_gene_index" value="3"/>
476 </section>
477 <section name="out">
478 <!-- intermediate files -->
479 <param name="output_basename" value="humann"/>
480 <param name="log_level" value="DEBUG"/>
481 <param name="output_format" value="tsv"/>
482 <param name="output_max_decimals" value="10"/>
483 <param name="remove_column_description_output" value="false"/>
484 <param name="remove_statified_output" value="false"/>
485 <param name="intermediate_temp"
486 value="metaphlan_bowtie2,metaphlan_bugs_list,bowtie2_alignment,bowtie2_reduced_alignment,bowtie2_unaligned,custom_chocophlan_database,diamond_aligned,diamond_unaligned"/>
487 </section>
488 <output name="gene_families_tsv" ftype="tabular" value="demo_genefamilies.tsv" compare="sim_size">
489 <assert_contents>
490 <has_text text="humann_Abundance-RPKs"/>
491 <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/>
492 <has_n_columns n="2"/>
493 </assert_contents>
494 </output>
495 <output name="pathcoverage_tsv" ftype="tabular" value="demo_pathcoverage.tsv" compare="sim_size">
496 <assert_contents>
497 <has_text text="humann_Coverage"/>
498 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
499 <has_n_columns n="2"/>
500 </assert_contents>
501 </output>
502 <output name="pathabundance_tsv" ftype="tabular" value="demo_pathabundance.tsv" compare="sim_size">
503 <assert_contents>
504 <has_text text="humann_Abundance"/>
505 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
506 <has_n_columns n="2"/>
507 </assert_contents>
508 </output>
509 <output name="log" ftype="txt">
510 <assert_contents>
511 <has_text text="DATABASE SETTINGS"/>
512 <has_text text="humann.utilities"/>
513 <has_text text="humann_genefamilies"/>
514 <has_text text="humann_pathabundance"/>
515 <has_text text="humann_pathcoverage"/>
516 <has_text text="g__Bacteroides.s__Bacteroides_dorei"/>
517 </assert_contents>
518 </output>
519 <output name="metaphlan_bowtie2" ftype="tabular">
520 <assert_contents>
521 <has_text text="s__Bacteroides_dorei_read000116"/>
522 <has_text text="357276__I9R1V6__DXD47_04125"/>
523 <has_text text="s__Bacteroides_dorei_read000129"/>
524 <has_text text="357276__B6W1Y5__IY41_11405"/>
525 </assert_contents>
526 </output>
527 <output name="metaphlan_bugs_list" ftype="tabular">
528 <assert_contents>
529 <has_text text="relative_abundance"/>
530 <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus"/>
531 <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei"/>
532 </assert_contents>
533 </output>
534 <output name="bowtie2_alignment" ftype="sam">
535 <assert_contents>
536 <has_text text="SN:821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/>
537 <has_text text="s__Bacteroides_dorei_read009840"/>
538 <has_text text="PN:bowtie2"/>
539 <has_text text="LN:1281"/>
540 </assert_contents>
541 </output>
542 <output name="bowtie2_reduced_alignment" ftype="tabular">
543 <assert_contents>
544 <has_text text="s__Bacteroides_dorei_read000001"/>
545 <has_text text="821__A6L5K0__BVU_3338|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A6L5K0|UniRef50_A6L5K0|468"/>
546 <has_text text="s__Bacteroides_vulgatus_read003845"/>
547 <has_text text="821__A0A396BBC3__DXC03_14350|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A396BBC3|UniRef50_A0A174FNA3|2934"/>
548 </assert_contents>
549 </output>
550 <output name="bowtie2_unaligned" ftype="fasta">
551 <assert_contents>
552 <has_text text=">s__Bacteroides_dorei_read000001|100"/>
553 <has_text text=">s__Bacteroides_dorei_read000002|100"/>
554 <has_text text=">unclassified_read000971|100"/>
555 <has_text text=">s__Bacteroides_vulgatus_read004473|100"/>
556 </assert_contents>
557 </output>
558 <output name="custom_chocophlan_database" ftype="fasta">
559 <assert_contents>
560 <has_text text=">821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/>
561 <has_text text=">821__F3PUY1__HMPREF9446_02555|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PUY1|UniRef50_A0A3E5DX68|411"/>
562 <has_text text=">821__A0A3E4KCH0__DXD33_19495|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A3E4KCH0|UniRef50_F3PP72|3582"/>
563 </assert_contents>
564 </output>
565 <output name="diamond_aligned" ftype="tabular">
566 <assert_contents>
567 <has_text text="UniRef90_Z5XVM9|969"/>
568 <has_text text="s__Bacteroides_vulgatus_read"/>
569 <has_text text="s__Bacteroides_vulgatus_read"/>
570 <has_text text="UniRef90_Y0KEF3|618"/>
571 </assert_contents>
572 </output>
573 <output name="diamond_unaligned" ftype="fasta">
574 <assert_contents>
575 <has_text text=">s__Bacteroides_dorei_read000001|100"/>
576 <has_text text=">s__Bacteroides_vulgatus_read006412|100"/>
577 <has_text text=">unclassified_read000867|100"/>
578 </assert_contents>
579 </output>
580 </test>
581 <test expect_num_outputs="4">
582 <conditional name="in">
583 <!-- fastq file -->
584 <param name="selector" value="raw"/>
585 <param name="input" value="demo.fasta.gz"/>
586 </conditional>
587 <conditional name="wf">
588 <!-- bypass_prescreen -->
589 <param name="selector" value="bypass_prescreen"/>
590 <section name="nucleotide_search">
591 <conditional name="nucleotide_db">
592 <param name="selector" value="cached"/>
593 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
594 </conditional>
595 <param name="nucleotide_identity_threshold" value="0"/>
596 <param name="nucleotide_subject_coverage_threshold" value="50"/>
597 <param name="nucleotide_query_coverage_threshold" value="90"/>
598 </section>
599 <section name="translated_search">
600 <conditional name="protein_db">
601 <param name="selector" value="cached"/>
602 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
603 </conditional>
604 <param name="evalue" value="1"/>
605 <param name="translated_subject_coverage_threshold" value="50"/>
606 <param name="translated_query_coverage_threshold" value="90"/>
607 </section>
608 </conditional>
609 <section name="g_p_quant">
610 <param name="gap_fill" value="true"/>
611 <param name="minpath" value="true"/>
612 <param name="pathways" value="metacyc"/>
613 <param name="xipe" value="false"/>
614 <param name="annotation_gene_index" value="3"/>
615 </section>
616 <section name="out">
617 <!-- Biom -->
618 <param name="output_basename" value="humann"/>
619 <param name="log_level" value="DEBUG"/>
620 <param name="output_format" value="biom"/>
621 <param name="output_max_decimals" value="10"/>
622 <param name="remove_column_description_output" value="false"/>
623 <param name="remove_statified_output" value="false"/>
624 <param name="intermediate_temp" value=""/>
625 </section>
626 <output name="gene_families_biom" ftype="biom1">
627 <assert_contents>
628 <has_text text="biom-format"/>
629 <has_text text="UniRef90_A0A396BPQ7|g__Bacteroides.s__Bacteroides_vulgatus"/>
630 <has_text text="UniRef90_W8YTG4|unclassified"/>
631 </assert_contents>
632 </output>
633 <output name="pathcoverage_biom" ftype="biom1">
634 <assert_contents>
635 <has_text text="TREE"/>
636 <has_text text="format-url"/>
637 <has_text text="biom-format"/>
638 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
639 <has_text text="humann_Coverage"/>
640 </assert_contents>
641 </output>
642 <output name="pathabundance_biom" ftype="biom1">
643 <assert_contents>
644 <has_text text="TREE"/>
645 <has_text text="format-url"/>
646 <has_text text="biom-format"/>
647 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
648 <has_text text="humann_Abundance"/>
649 </assert_contents>
650 </output>
651 <output name="log" ftype="txt">
652 <assert_contents>
653 <has_text text="Running bowtie2-build ........"/>
654 <has_text text="Total bugs from nucleotide alignment: 2"/>
655 <has_text text="Total gene families from nucleotide alignment: "/>
656 <has_text text="Aligning to reference database: "/>
657 <has_text text="Total gene families after translated alignment: "/>
658 </assert_contents>
659 </output>
660 </test>
661 <test expect_num_outputs="4">
662 <conditional name="in">
663 <param name="selector" value="raw"/>
664 <param name="input" value="demo.fasta.gz"/>
665 </conditional>
666 <conditional name="wf">
667 <!-- bypass_taxonomic_profiling -->
668 <param name="selector" value="bypass_taxonomic_profiling"/>
669 <param name="taxonomic_profile" value="demo-taxonomic-profile.tabular"/>
670 <section name="nucleotide_search">
671 <conditional name="nucleotide_db">
672 <param name="selector" value="cached"/>
673 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
674 </conditional>
675 <param name="nucleotide_identity_threshold" value="0"/>
676 <param name="nucleotide_subject_coverage_threshold" value="50"/>
677 <param name="nucleotide_query_coverage_threshold" value="90"/>
678 </section>
679 <section name="translated_search">
680 <conditional name="protein_db">
681 <param name="selector" value="cached"/>
682 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
683 </conditional>
684 <param name="evalue" value="1"/>
685 <param name="translated_subject_coverage_threshold" value="50"/>
686 <param name="translated_query_coverage_threshold" value="90"/>
687 </section>
688 </conditional>
689 <section name="g_p_quant">
690 <param name="gap_fill" value="true"/>
691 <param name="minpath" value="true"/>
692 <param name="pathways" value="metacyc"/>
693 <param name="xipe" value="false"/>
694 <param name="annotation_gene_index" value="3"/>
695 </section>
696 <section name="out">
697 <param name="output_basename" value="humann"/>
698 <param name="log_level" value="DEBUG"/>
699 <param name="output_format" value="tsv"/>
700 <param name="output_max_decimals" value="10"/>
701 <param name="remove_column_description_output" value="false"/>
702 <param name="remove_statified_output" value="false"/>
703 <param name="intermediate_temp" value=""/>
704 </section>
705 <output name="gene_families_tsv" ftype="tabular">
706 <assert_contents>
707 <has_text text="humann_Abundance-RPKs"/>
708 <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/>
709 <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/>
710 <has_n_columns n="2"/>
711 </assert_contents>
712 </output>
713 <output name="pathcoverage_tsv" ftype="tabular">
714 <assert_contents>
715 <has_text text="humann_Coverage"/>
716 <has_text text="UNINTEGRATED|unclassified"/>
717 <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/>
718 <has_n_columns n="2"/>
719 </assert_contents>
720 </output>
721 <output name="pathabundance_tsv" ftype="tabular">
722 <assert_contents>
723 <has_text text="humann_Abundance"/>
724 <has_text text="UNINTEGRATED|unclassified"/>
725 <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/>
726
727 <has_n_columns n="2"/>
728 </assert_contents>
729 </output>
730 <output name="log" ftype="txt">
731 <assert_contents>
732 <has_text text="Found g__Bacteroides.s__Bacteroides_vulgatus : "/>
733 <has_text text="Total species selected from prescreen: 2"/>
734 <has_text text="Total bugs from nucleotide alignment: 2"/>
735 <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: "/>
736 <has_text text="g__Bacteroides.s__Bacteroides_dorei: "/>
737 <has_text text="Total gene families from nucleotide alignment: "/>
738 <has_text text="Total bugs after translated alignment: 3"/>
739 <has_text text="Total gene families after translated alignment"/>
740 </assert_contents>
741 </output>
742 </test>
743 <test expect_num_outputs="4">
744 <conditional name="in">
745 <!-- mapping SAM file -->
746 <param name="selector" value="mapping"/>
747 <param name="input" value="demo.sam"/>
748 </conditional>
749 <conditional name="wf">
750 <!-- bypass_nucleotide_index -->
751 <param name="selector" value="bypass_nucleotide_index"/>
752 <section name="nucleotide_search">
753 <conditional name="nucleotide_db">
754 <param name="selector" value="cached"/>
755 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
756 </conditional>
757 <param name="nucleotide_identity_threshold" value="0"/>
758 <param name="nucleotide_subject_coverage_threshold" value="50"/>
759 <param name="nucleotide_query_coverage_threshold" value="90"/>
760 </section>
761 <section name="translated_search">
762 <conditional name="protein_db">
763 <param name="selector" value="cached"/>
764 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
765 </conditional>
766 <param name="evalue" value="1"/>
767 <param name="translated_subject_coverage_threshold" value="50"/>
768 <param name="translated_query_coverage_threshold" value="90"/>
769 </section>
770 </conditional>
771 <section name="g_p_quant">
772 <param name="gap_fill" value="true"/>
773 <param name="minpath" value="true"/>
774 <param name="pathways" value="metacyc"/>
775 <param name="xipe" value="false"/>
776 <param name="annotation_gene_index" value="3"/>
777 </section>
778 <section name="out">
779 <param name="output_basename" value="humann"/>
780 <param name="log_level" value="DEBUG"/>
781 <param name="output_format" value="tsv"/>
782 <param name="output_max_decimals" value="10"/>
783 <param name="remove_column_description_output" value="false"/>
784 <param name="remove_statified_output" value="false"/>
785 <param name="intermediate_temp" value=""/>
786 </section>
787 <output name="gene_families_tsv" ftype="tabular">
788 <assert_contents>
789 <has_text text="UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei"/>
790 <has_text text="UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus"/>
791 <has_n_columns n="2"/>
792 </assert_contents>
793 </output>
794 <output name="pathcoverage_tsv" ftype="tabular">
795 <assert_contents>
796 <has_text text="UNMAPPED"/>
797 <has_text text="UNINTEGRATED"/>
798 <has_n_columns n="2"/>
799 </assert_contents>
800 </output>
801 <output name="pathabundance_tsv" ftype="tabular">
802 <assert_contents>
803 <has_text text="UNMAPPED"/>
804 <has_text text="UNINTEGRATED"/>
805 <has_n_columns n="2"/>
806 </assert_contents>
807 </output>
808 <output name="log" ftype="txt">
809 <assert_contents>
810 <has_text text="Process the sam mapping results"/>
811 <has_text text="Computing gene families"/>
812 <has_text text="Computing pathways abundance and coverage"/>
813 </assert_contents>
814 </output>
815 </test>
816 <test expect_num_outputs="4">
817 <conditional name="in">
818 <!-- raw fasta file -->
819 <param name="selector" value="raw"/>
820 <param name="input" value="demo.fastq.gz"/>
821 </conditional>
822 <conditional name="wf">
823 <!-- bypass_nucleotide_search -->
824 <param name="selector" value="bypass_nucleotide_search"/>
825 <section name="translated_search">
826 <conditional name="protein_db">
827 <param name="selector" value="cached"/>
828 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/>
829 </conditional>
830 <param name="evalue" value="1"/>
831 <param name="translated_subject_coverage_threshold" value="50"/>
832 <param name="translated_query_coverage_threshold" value="90"/>
833 </section>
834 </conditional>
835 <section name="g_p_quant">
836 <param name="gap_fill" value="true"/>
837 <param name="minpath" value="true"/>
838 <param name="pathways" value="metacyc"/>
839 <param name="xipe" value="false"/>
840 <param name="annotation_gene_index" value="3"/>
841 </section>
842 <section name="out">
843 <param name="output_basename" value="humann"/>
844 <param name="log_level" value="DEBUG"/>
845 <param name="output_format" value="tsv"/>
846 <param name="output_max_decimals" value="10"/>
847 <param name="remove_column_description_output" value="false"/>
848 <param name="remove_statified_output" value="false"/>
849 <param name="intermediate_temp" value=""/>
850 </section>
851 <output name="gene_families_tsv" ftype="tabular">
852 <assert_contents>
853 <has_text text="humann_Abundance-RPKs"/>
854 <has_text text="UniRef90_Q9ZUH4|unclassified"/>
855 <has_n_columns n="2"/>
856 </assert_contents>
857 </output>
858 <output name="pathcoverage_tsv" ftype="tabular">
859 <assert_contents>
860 <has_text text="humann_Coverage"/>
861 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
862 <has_n_columns n="2"/>
863 </assert_contents>
864 </output>
865 <output name="pathabundance_tsv" ftype="tabular">
866 <assert_contents>
867 <has_text text="humann_Abundance"/>
868 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/>
869 <has_n_columns n="2"/>
870 </assert_contents>
871 </output>
872 <output name="log" ftype="txt">
873 <assert_contents>
874 <has_text text="Total bugs after translated alignment: 1"/>
875 <has_text text="unclassified: "/>
876 <has_text text="Unaligned reads after translated alignment: "/>
877 <has_text text="Total gene families"/>
878 </assert_contents>
879 </output>
880 </test>
881 <test expect_num_outputs="4">
882 <conditional name="in">
883 <!-- raw fasta file -->
884 <param name="selector" value="raw"/>
885 <param name="input" value="demo.fastq.gz"/>
886 </conditional>
887 <conditional name="wf">
888 <!-- bypass_translated_search -->
889 <param name="selector" value="bypass_translated_search"/>
890 <section name="prescreen">
891 <conditional name="metaphlan_db">
892 <param name="selector" value="cached"/>
893 <param name="cached_db" value="metaphlan-demo-db-20210421"/>
894 </conditional>
895 <param name="prescreen_threshold" value="0.01"/>
896 </section>
897 <section name="nucleotide_search">
898 <conditional name="nucleotide_db">
899 <param name="selector" value="cached"/>
900 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
901 </conditional>
902 <param name="nucleotide_identity_threshold" value="0"/>
903 <param name="nucleotide_subject_coverage_threshold" value="50"/>
904 <param name="nucleotide_query_coverage_threshold" value="90"/>
905 </section>
906 </conditional>
907 <section name="g_p_quant">
908 <param name="gap_fill" value="true"/>
909 <param name="minpath" value="true"/>
910 <param name="pathways" value="metacyc"/>
911 <param name="xipe" value="false"/>
912 <param name="annotation_gene_index" value="3"/>
913 </section>
914 <section name="out">
915 <param name="output_basename" value="newname"/>
916 <param name="log_level" value="DEBUG"/>
917 <param name="output_format" value="tsv"/>
918 <param name="output_max_decimals" value="10"/>
919 <param name="remove_column_description_output" value="false"/>
920 <param name="remove_statified_output" value="false"/>
921 <param name="intermediate_temp" value=""/>
922 </section>
923 <output name="gene_families_tsv" ftype="tabular">
924 <assert_contents>
925 <has_text text="newname_Abundance-RPKs"/>
926 <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/>
927 <has_n_columns n="2"/>
928 </assert_contents>
929 </output>
930 <output name="pathcoverage_tsv" ftype="tabular">
931 <assert_contents>
932 <has_text text="newname_Coverage"/>
933 <has_text text="UNMAPPED"/>
934 <has_text text="UNINTEGRATED"/>
935 <has_n_columns n="2"/>
936 </assert_contents>
937 </output>
938 <output name="pathabundance_tsv" ftype="tabular">
939 <assert_contents>
940 <has_text text="newname_Abundance"/>
941 <has_text text="UNMAPPED"/>
942 <has_text text="UNINTEGRATED"/>
943 <has_n_columns n="2"/>
944 </assert_contents>
945 </output>
946 <output name="log" ftype="txt">
947 <assert_contents>
948 <has_text text="Total bugs from nucleotide alignment: 2"/>
949 <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: 1195 hits"/>
950 <has_text text="g__Bacteroides.s__Bacteroides_dorei: 1260 hits"/>
951 <has_text text="Total gene families from nucleotide alignment: 545"/>
952 <has_text text="Bypass translated search"/>
953 </assert_contents>
954 </output>
955 </test>
956 <!-- This test should fail for as metaphlan v4 is invoked on outdated input DB. -->
957 <test expect_exit_code="1" expect_failure="true">
958 <conditional name="in">
959 <!-- raw fasta file -->
960 <param name="selector" value="raw"/>
961 <param name="input" value="demo.fastq.gz"/>
962 </conditional>
963 <conditional name="wf">
964 <!-- bypass_translated_search -->
965 <param name="selector" value="bypass_translated_search"/>
966 <section name="prescreen">
967 <conditional name="metaphlan_db">
968 <param name="selector" value="cached"/>
969 <param name="cached_db" value="metaphlan-db-old-structure"/>
970 </conditional>
971 </section>
972 <section name="nucleotide_search">
973 <conditional name="nucleotide_db">
974 <param name="selector" value="cached"/>
975 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/>
976 </conditional>
977 </section>
978 </conditional>
979 </test>
980 </tests>
981 <help><![CDATA[
982 @HELP_HEADER@
983
984 This tool corresponds to the main tool in HUMAnN pipeline:
985
986
987 1. Taxomonic prescreen
988
989 Reads are mapped (with MetaPhlAn) to clade-specific marker genes to rapidly identify community species
990
991 2. Pangenome search (nucleotide search)
992
993 Reads are mapped (with Bowtie2) to pangenomes of identified species
994
995 3. Translated search
996
997 Unclassified reads are aligned to a comprehensive and non-redundant protein database
998
999 4. Gene family and pathway quantification
1000
1001 - Gene abundance estimation
1002
1003 Mapping results are processed to estimate per-species and community total gene family abundance, weighting by
1004
1005 - alignment Quality
1006 - gene length
1007 - gene coverage
1008
1009 - Per-species and community-level metabolic network reconstruction
1010
1011 Genes are mapped to metabolic reactions to identify a parsiomonious set of pathways that explains each species' observed reactions
1012
1013 Pathway abundance and coverage are quantified by:
1014
1015 1. optimizing over alternative subpathways
1016 2. imputing abundance for conspicuously depleted reactions
1017
1018
1019 Inputs
1020 ======
1021
1022 HUMAnN can start from a few different types of input data each in a few different types of formats:
1023
1024 - Quality-controlled shotgun sequencing reads
1025
1026 This is the most common starting point : A metagenome (DNA reads) or metatranscriptome (RNA reads)
1027
1028 - Pre-computed mappings of reads to database sequences
1029
1030 - Pre-computed (typically gene) abundance tables
1031
1032
1033 HUMAnN uses 3 reference databases
1034 Locally cached databases have to be downloaded before using them (using the dedicated tool). Custom databases can also be used after upload.
1035
1036 Outputs
1037 =======
1038
1039 HUMAnN creates three output files:
1040
1041 - Gene families and their abundance
1042 - Pathways and their abundance
1043 - Pathways and their coverage
1044
1045 Ten intermediate temp output files can also be retrieved.
1046
1047 ]]></help>
1048 <expand macro="citations"/>
1049 </tool>