Mercurial > repos > thanhlv > humann3
comparison humann.xml @ 0:ab86614989fd draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/humann commit 6b06711cfba45855d5a992ed1c73c472eaef644f
author | thanhlv |
---|---|
date | Mon, 13 Feb 2023 16:16:49 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ab86614989fd |
---|---|
1 <tool id="humann3" name="HUMAnN3" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"> | |
2 <description>to profile presence/absence and abundance of microbial pathways and gene families</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 <xml name="prescreen"> | |
6 <section name="prescreen" title="Prescreen / Identifying community species" expanded="true"> | |
7 <conditional name="metaphlan_db"> | |
8 <param name="selector" type="select" label="Database with clade-specific marker genes"> | |
9 <option value="cached" selected="true">Locally cached</option> | |
10 <option value="history">From history</option> | |
11 </param> | |
12 <when value="cached"> | |
13 <param name="cached_db" label="Cached database with clade-specific marker genes" type="select"> | |
14 <options from_data_table="metaphlan_database"> | |
15 <validator message="No MetaPhlAn database is available" type="no_options" /> | |
16 </options> | |
17 </param> | |
18 </when> | |
19 <when value="history"> | |
20 <param argument="--bowtie2db" type="data" format="fasta" label="Database with clade-specific marker genes from history"/> | |
21 <param argument="--mpa_pkl" type="data" format="json" label="Metadata associated to the database with clade-specific marker genes from history"/> | |
22 </when> | |
23 </conditional> | |
24 <param argument="--prescreen-threshold" type="float" value="0.01" min="0" max="100" label="Minimum percentage of reads matching a species"/> | |
25 <!-- add metaphlan options --> | |
26 </section> | |
27 </xml> | |
28 <token name="@PRESCREEN_PREPARE@"><![CDATA[ | |
29 #if $wf.prescreen.metaphlan_db.selector == "history" | |
30 mkdir metaphlan_db | |
31 && | |
32 bowtie2-build --large-index '$wf.prescreen.metaphlan_db.bowtie2db' 'metaphlan_db/custom_db-v30' | |
33 && | |
34 python '$__tool_directory__/customizemetadata.py' | |
35 transform_json_to_pkl | |
36 --json '$wf.prescreen.metaphlan_db.mpa_pkl' | |
37 --pkl 'metaphlan_db/custom_db-v30.pkl' | |
38 && | |
39 #end if | |
40 ]]></token> | |
41 <token name="@PRESCREEN_RUN@"><![CDATA[ | |
42 #set $metaphlan_option = "-t rel_ab" | |
43 #if $wf.prescreen.metaphlan_db.selector == "history" | |
44 #set $metaphlan_option += " --bowtie2db metaphlan_db/" | |
45 #set $metaphlan_option += " --index custom_db-v30" | |
46 #else | |
47 #set $metaphlan_option += " --bowtie2db %s" % $wf.prescreen.metaphlan_db.cached_db.fields.path | |
48 #set $metaphlan_option += " --index %s" % $wf.prescreen.metaphlan_db.cached_db.fields.dbkey | |
49 #end if | |
50 --metaphlan-options="$metaphlan_option" | |
51 --prescreen-threshold $wf.prescreen.prescreen_threshold | |
52 ]]></token> | |
53 <xml name="nucleotide_database"> | |
54 <param argument="--nucleotide-database" type="data_collection" collection_type="list" format="fasta" label="Nucleotide database from history" help="Each file must be named: ^[g__].[s__]"/> | |
55 </xml> | |
56 <xml name="nucleotide_search"> | |
57 <section name="nucleotide_search" title="Nucleotide search / Mapping reads to community pangenomes" expanded="true"> | |
58 <conditional name="nucleotide_db"> | |
59 <param name="selector" type="select" label="Nucleotide database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases"> | |
60 <option value="cached" selected="true">Locally cached</option> | |
61 <option value="history">From history</option> | |
62 </param> | |
63 <when value="cached"> | |
64 <param name="nucleotide_database" type="select" label="Nucleotide database"> | |
65 <options from_data_table="humann_nucleotide_database"> | |
66 <validator message="No nucleotide database is available" type="no_options" /> | |
67 </options> | |
68 </param> | |
69 </when> | |
70 <when value="history"> | |
71 <expand macro="nucleotide_database"/> | |
72 </when> | |
73 </conditional> | |
74 <!-- add bowtie2 options --> | |
75 <param argument="--nucleotide-identity-threshold" type="float" value="0" min="0" max="100" | |
76 label="Identity threshold for nucleotide alignments"/> | |
77 <param argument="--nucleotide-subject-coverage-threshold" type="float" value="50" min="0" max="100" | |
78 label="Subject coverage threshold for nucleotide alignments"/> | |
79 <param argument="--nucleotide-query-coverage-threshold" type="float" value="90" min="0" max="100" | |
80 label="Query coverage threshold for nucleotide alignments"/> | |
81 </section> | |
82 </xml> | |
83 <token name="@NUCLEOTIDE_SEARCH_PREPARE@"><![CDATA[ | |
84 #if $wf.nucleotide_search.nucleotide_db.selector == 'history' | |
85 mkdir nucleotide_db | |
86 && | |
87 #for $f in $wf.nucleotide_search.nucleotide_db.nucleotide_database: | |
88 ln -s '$f' 'nucleotide_db/${re.sub('[^\w\-_.]', '_', f.element_identifier)}.v201901_v31' && | |
89 #end for | |
90 #end if | |
91 ]]></token> | |
92 <token name="@NUCLEOTIDE_SEARCH_RUN@"><![CDATA[ | |
93 #if $wf.nucleotide_search.nucleotide_db.selector == 'history' | |
94 --nucleotide-database nucleotide_db | |
95 #else | |
96 --nucleotide-database '$wf.nucleotide_search.nucleotide_db.nucleotide_database.fields.path' | |
97 #end if | |
98 --nucleotide-identity-threshold $wf.nucleotide_search.nucleotide_identity_threshold | |
99 --nucleotide-subject-coverage-threshold $wf.nucleotide_search.nucleotide_subject_coverage_threshold | |
100 --nucleotide-query-coverage-threshold $wf.nucleotide_search.nucleotide_query_coverage_threshold | |
101 ]]></token> | |
102 <xml name="translated_search"> | |
103 <section name="translated_search" title="Translated search / Aligning unmapped reads to a protein database" expanded="true"> | |
104 <conditional name="protein_db"> | |
105 <param name="selector" type="select" label="Protein database" help="For locally cached databases, you need first to execute the tool to download HUMAnN2 databases"> | |
106 <option value="cached" selected="true">Locally cached</option> | |
107 <option value="history">From history</option> | |
108 </param> | |
109 <when value="cached"> | |
110 <param name="protein_database" type="select" label="Protein database"> | |
111 <options from_data_table="humann_protein_database"> | |
112 <validator message="No protein database is available" type="no_options" /> | |
113 </options> | |
114 </param> | |
115 </when> | |
116 <when value="history"> | |
117 <param argument="--protein-database" type="data" format="fasta" label="Protein database from history"/> | |
118 <param argument="--search-mode" type="select" label="Search mode"> | |
119 <option value="uniref50">UniRef50</option> | |
120 <option value="uniref90" selected="true">UniRef90</option> | |
121 </param> | |
122 </when> | |
123 </conditional> | |
124 <param argument="--evalue" type="float" value="1" label="E-value threshold to use with the translated search"/> | |
125 <!-- add diamond options --> | |
126 <param argument="--translated-identity-threshold" type="float" min="0" max="100" optional="true" | |
127 label="Identity threshold for translated alignments" | |
128 help="It is tuned automatically (based on uniref mode) unless a custom value is specified"/> | |
129 <param argument="--translated-subject-coverage-threshold" type="float" value="50" min="0" max="100" | |
130 label="Subject coverage threshold for translated alignments"/> | |
131 <param argument="--translated-query-coverage-threshold" type="float" value="90" min="0" max="100" | |
132 label="Query coverage threshold for translated alignments"/> | |
133 </section> | |
134 </xml> | |
135 <token name="@TRANSLATED_SEARCH_PREPARE@"><![CDATA[ | |
136 #if $wf.translated_search.protein_db.selector == 'history' | |
137 mkdir protein_db | |
138 && | |
139 diamond makedb | |
140 --in '$wf.translated_search.protein_db.protein_database' | |
141 --db 'protein_db/protein-db-201901b' | |
142 --threads "\${GALAXY_SLOTS:-4}" | |
143 && | |
144 #end if | |
145 ]]></token> | |
146 <token name="@TRANSLATED_SEARCH_RUN@"><![CDATA[ | |
147 --translated-alignment 'diamond' | |
148 #if $wf.translated_search.protein_db.selector == 'history' | |
149 --protein-database protein_db | |
150 --search-mode '$wf.translated_search.protein_db.search_mode' | |
151 #else | |
152 --protein-database '$wf.translated_search.protein_db.protein_database.fields.path' | |
153 #if 'uniref50' in $wf.translated_search.protein_db.protein_database.fields.value | |
154 --search-mode 'uniref50' | |
155 #else | |
156 --search-mode 'uniref90' | |
157 #end if | |
158 #end if | |
159 --evalue $wf.translated_search.evalue | |
160 #if str($wf.translated_search.translated_identity_threshold) != '' | |
161 --identity-threshold $wf.translated_search.translated_identity_threshold | |
162 #end if | |
163 --translated-subject-coverage-threshold $wf.translated_search.translated_subject_coverage_threshold | |
164 --translated-query-coverage-threshold $wf.translated_search.translated_query_coverage_threshold | |
165 ]]></token> | |
166 </macros> | |
167 <expand macro="edam_ontology"/> | |
168 <expand macro="requirements"/> | |
169 <expand macro="version"/> | |
170 <command detect_errors="exit_code"><![CDATA[ | |
171 #import re | |
172 #if $in.input.ext.startswith("fasta") | |
173 #set ext="fasta" | |
174 #else if $in.input.ext.startswith("fastq") | |
175 #set ext="fastq" | |
176 #else if $in.input.ext.endswith("bam") | |
177 #set ext="bam" | |
178 #else if $in.input.ext == 'sam' | |
179 #set ext="sam" | |
180 #else if $in.input.ext == 'biom1' | |
181 #set ext="biom" | |
182 #else | |
183 >&2 "unknown extension $in.input.ext" | |
184 exit 1; | |
185 #end if | |
186 #if $in.input.ext.endswith(".gz") | |
187 #set ext+=".gz" | |
188 #end if | |
189 | |
190 #if $wf.selector == 'bypass_prescreen' | |
191 @NUCLEOTIDE_SEARCH_PREPARE@ | |
192 @TRANSLATED_SEARCH_PREPARE@ | |
193 #else if $wf.selector == 'bypass_taxonomic_profiling' | |
194 @NUCLEOTIDE_SEARCH_PREPARE@ | |
195 @TRANSLATED_SEARCH_PREPARE@ | |
196 #else if $wf.selector == 'bypass_nucleotide_index' | |
197 @NUCLEOTIDE_SEARCH_PREPARE@ | |
198 @TRANSLATED_SEARCH_PREPARE@ | |
199 #else if $wf.selector == 'bypass_nucleotide_search' | |
200 @TRANSLATED_SEARCH_PREPARE@ | |
201 #else if $wf.selector == 'bypass_translated_search' | |
202 @PRESCREEN_PREPARE@ | |
203 @NUCLEOTIDE_SEARCH_PREPARE@ | |
204 #else if $wf.selector == 'none' | |
205 @PRESCREEN_PREPARE@ | |
206 @NUCLEOTIDE_SEARCH_PREPARE@ | |
207 @TRANSLATED_SEARCH_PREPARE@ | |
208 #end if | |
209 | |
210 humann | |
211 --input '$input' | |
212 --input-format $ext | |
213 -o 'output' | |
214 #if $wf.selector == 'bypass_prescreen' | |
215 --bypass-prescreen | |
216 @NUCLEOTIDE_SEARCH_RUN@ | |
217 @TRANSLATED_SEARCH_RUN@ | |
218 #else if $wf.selector == 'bypass_taxonomic_profiling' | |
219 --taxonomic-profile '$wf.taxonomic_profile' | |
220 @NUCLEOTIDE_SEARCH_RUN@ | |
221 @TRANSLATED_SEARCH_RUN@ | |
222 #else if $wf.selector == 'bypass_nucleotide_index' | |
223 --bypass-nucleotide-index | |
224 @NUCLEOTIDE_SEARCH_RUN@ | |
225 @TRANSLATED_SEARCH_RUN@ | |
226 #else if $wf.selector == 'bypass_nucleotide_search' | |
227 --bypass-nucleotide-search | |
228 @TRANSLATED_SEARCH_RUN@ | |
229 #else if $wf.selector == 'bypass_translated_search' | |
230 --bypass-translated-search | |
231 @PRESCREEN_RUN@ | |
232 @NUCLEOTIDE_SEARCH_RUN@ | |
233 #else if $wf.selector == 'none' | |
234 @PRESCREEN_RUN@ | |
235 @NUCLEOTIDE_SEARCH_RUN@ | |
236 @TRANSLATED_SEARCH_RUN@ | |
237 #end if | |
238 --gap-fill '$g_p_quant.gap_fill' | |
239 --minpath '$g_p_quant.minpath' | |
240 --pathways '$g_p_quant.pathways' | |
241 --xipe '$g_p_quant.xipe' | |
242 --annotation-gene-index $g_p_quant.annotation_gene_index | |
243 #if $g_p_quant.id_mapping | |
244 --id-mapping '$g_p_quant.id_mapping' | |
245 #end if | |
246 --log-level 'DEBUG' | |
247 --o-log '$log' | |
248 --output-basename '$out.output_basename' | |
249 --output-format '$out.output_format' | |
250 --output-max-decimals $out.output_max_decimals | |
251 $out.remove_column_description_output | |
252 $out.remove_stratified_output | |
253 --threads "\${GALAXY_SLOTS:-4}" | |
254 --memory-use minimum | |
255 ]]></command> | |
256 <inputs> | |
257 <conditional name="in"> | |
258 <param name="selector" type="select" label="Input(s)"> | |
259 <option value="raw" selected="true">Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))</option> | |
260 <option value="mapping">Pre-computed mappings of reads to database sequences</option> | |
261 <option value="abundance">Pre-computed (typically gene) abundance tables</option> | |
262 </param> | |
263 <when value="raw"> | |
264 <param name="input" type="data" format="fastq,fastq.gz,fasta,fasta.gz" | |
265 label="Quality-controlled shotgun sequencing reads (metagenome (DNA reads) or metatranscriptome (RNA reads))" | |
266 help="Paired-end Fasta/FastQ files should be merged first"/> | |
267 </when> | |
268 <when value="mapping"> | |
269 <param name="input" type="data" format="sam,bam" label="Pre-computed mappings of reads to database sequences"/> | |
270 </when> | |
271 <when value="abundance"> | |
272 <param name="input" type="data" format="tabular,tsv,biom1" label="Pre-computed (typically gene) abundance tables"/> | |
273 </when> | |
274 </conditional> | |
275 <conditional name="wf"> | |
276 <param name="selector" type="select" label="Steps"> | |
277 <option value="bypass_prescreen">Bypass the prescreen step and run on the full ChocoPhlAn database (--bypass-prescreen)</option> | |
278 <option value="bypass_taxonomic_profiling">Bypass the taxonomic profiling step and creates a custom ChocoPhlAn database of the species provided afterwards</option> | |
279 <option value="bypass_nucleotide_index">Starts the workflow with the nucleotide alignment step using the provided indexed database (--bypass-nucleotide-index)</option> | |
280 <option value="bypass_nucleotide_search">Bypass all of the alignment steps before the translated search (--bypass_nucleotide-search)</option> | |
281 <option value="bypass_translated_search">Run all of the alignment steps except the translated search (--bypass_translated-search)</option> | |
282 <option value="none" selected="true">Run the full workflow steps</option> | |
283 </param> | |
284 <when value="bypass_prescreen"> | |
285 <expand macro="nucleotide_search"/> | |
286 <expand macro="translated_search"/> | |
287 </when> | |
288 <when value="bypass_taxonomic_profiling"> | |
289 <param argument="--taxonomic-profile" type="data" format="tabular,txt,tsv" label="Taxonomic profile file"/> | |
290 <expand macro="nucleotide_search"/> | |
291 <expand macro="translated_search"/> | |
292 </when> | |
293 <when value="bypass_nucleotide_index"> | |
294 <expand macro="nucleotide_search"/> | |
295 <expand macro="translated_search"/> | |
296 </when> | |
297 <when value="bypass_nucleotide_search"> | |
298 <expand macro="translated_search"/> | |
299 </when> | |
300 <when value="bypass_translated_search"> | |
301 <expand macro="prescreen"/> | |
302 <expand macro="nucleotide_search"/> | |
303 </when> | |
304 <when value="none"> | |
305 <expand macro="prescreen"/> | |
306 <expand macro="nucleotide_search"/> | |
307 <expand macro="translated_search"/> | |
308 </when> | |
309 </conditional> | |
310 <section name="g_p_quant" title="Gene and pathway quantification" expanded="true"> | |
311 <param argument="--gap-fill" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use gap fill computation?"/> | |
312 <param argument="--minpath" type="boolean" truevalue="on" falsevalue="off" checked="true" label="Use minpath computation?"/> | |
313 <param argument="--pathways" type="select" label="Database to use for pathway computations"> | |
314 <option value="metacyc" selected="true">MetaCyc</option> | |
315 <option value="unipathway">UniPathway</option> | |
316 </param> | |
317 <param argument="--xipe" type="boolean" truevalue="on" falsevalue="off" checked="false" label="Use xipe computation?"/> | |
318 <param argument="--annotation-gene-index" type="integer" value="3" label="Index of the gene in the sequence annotation"/> | |
319 <param argument="--id-mapping" type="data" format="tsv" optional="true" label="id mapping file for alignments"/> | |
320 </section> | |
321 <section name="out" title="Outputs" expanded="true"> | |
322 <param argument="--output-basename" type="text" value="humann" label="basename"> | |
323 <sanitizer invalid_char=""> | |
324 <valid initial="string.ascii_letters,string.digits"> | |
325 <add value="_" /> | |
326 <add value="-" /> | |
327 </valid> | |
328 </sanitizer> | |
329 <validator type="empty_field" /> | |
330 </param> | |
331 <param argument="--output-format" type="select" label="Format of the output files"> | |
332 <option value="tsv" selected="true">Tabular</option> | |
333 <option value="biom">BIOM</option> | |
334 </param> | |
335 <param argument="--output-max-decimals" type="integer" value="10" label="Number of decimals to output"/> | |
336 <param argument="--remove-column-description-output" type="boolean" truevalue="--remove-column-description-output" falsevalue="" checked="false" label="Remove description in the output column?"/> | |
337 <param argument="--remove-stratified-output" type="boolean" truevalue="--remove-stratified-output" falsevalue="" checked="false" label="Remove stratification from output?"/> | |
338 <param name="intermediate_temp" type="select" multiple="true" label="Intermediate output files"> | |
339 <option value="metaphlan_bowtie2">MetaPhlAn Bowtie2 output</option> | |
340 <option value="metaphlan_bugs_list">MetaPhlAn bugs list</option> | |
341 <option value="bowtie2_alignment">Bowtie2 alignment results</option> | |
342 <option value="bowtie2_reduced_alignment">Bowtie2 reduced alignment results</option> | |
343 <option value="bowtie2_unaligned">Unaligned reads after Bowtie2</option> | |
344 <option value="custom_chocophlan_database">Custom ChocoPhlAn database</option> | |
345 <option value="diamond_aligned">Translated alignment results</option> | |
346 <option value="diamond_unaligned">Translated alignment unaligned reads</option> | |
347 </param> | |
348 </section> | |
349 </inputs> | |
350 <outputs> | |
351 <data name="gene_families_tsv" format="tabular" from_work_dir="output/*_genefamilies.tsv" label="${tool.name} on ${on_string}: Gene families and their abundance" > | |
352 <filter>out['output_format'] == "tsv"</filter> | |
353 </data> | |
354 <data name="gene_families_biom" format="biom1" from_work_dir="output/*_genefamilies.biom" label="${tool.name} on ${on_string}: Gene families and their abundance" > | |
355 <filter>out['output_format'] == "biom"</filter> | |
356 </data> | |
357 <data name="pathcoverage_tsv" format="tabular" from_work_dir="output/*_pathcoverage.tsv" label="${tool.name} on ${on_string}: Pathways and their coverage" > | |
358 <filter>out['output_format'] == "tsv"</filter> | |
359 </data> | |
360 <data name="pathcoverage_biom" format="biom1" from_work_dir="output/*_pathcoverage.biom" label="${tool.name} on ${on_string}: Pathways and their coverage" > | |
361 <filter>out['output_format'] == "biom"</filter> | |
362 </data> | |
363 <data name="pathabundance_tsv" format="tabular" from_work_dir="output/*_pathabundance.tsv" label="${tool.name} on ${on_string}: Pathways and their abundance" > | |
364 <filter>out['output_format'] == "tsv"</filter> | |
365 </data> | |
366 <data name="pathabundance_biom" format="biom1" from_work_dir="output/*_pathabundance.biom" label="${tool.name} on ${on_string}: Pathways and their abundance" > | |
367 <filter>out['output_format'] == "biom"</filter> | |
368 </data> | |
369 <data format="txt" name="log" label="${tool.name} on ${on_string}: Log"/> | |
370 <data format="tabular" name="metaphlan_bowtie2" from_work_dir="output/*_temp/*_metaphlan_bowtie2.txt" label="${tool.name} on ${on_string}: MetaPhlAn Bowtie2 output" > | |
371 <filter>out['intermediate_temp'] and "metaphlan_bowtie2" in out['intermediate_temp']</filter> | |
372 </data> | |
373 <data format="tabular" name="metaphlan_bugs_list" from_work_dir="output/*_temp/*_metaphlan_bugs_list.tsv" label="${tool.name} on ${on_string}: MetaPhlAn bugs list" > | |
374 <filter>out['intermediate_temp'] and "metaphlan_bugs_list" in out['intermediate_temp']</filter> | |
375 </data> | |
376 <data format="sam" name="bowtie2_alignment" from_work_dir="output/*_temp/*_bowtie2_aligned.sam" label="${tool.name} on ${on_string}: Bowtie2 alignment results" > | |
377 <filter>out['intermediate_temp'] and "bowtie2_alignment" in out['intermediate_temp']</filter> | |
378 </data> | |
379 <data format="tabular" name="bowtie2_reduced_alignment" from_work_dir="output/*_temp/*_bowtie2_aligned.tsv" label="${tool.name} on ${on_string}: Bowtie2 reduced alignment results" > | |
380 <filter>out['intermediate_temp'] and "bowtie2_reduced_alignment" in out['intermediate_temp']</filter> | |
381 </data> | |
382 <data format="fasta" name="bowtie2_unaligned" from_work_dir="output/*_temp/*_bowtie2_unaligned.fa" label="${tool.name} on ${on_string}: Unaligned reads after Bowtie2" > | |
383 <filter>out['intermediate_temp'] and "bowtie2_unaligned" in out['intermediate_temp']</filter> | |
384 </data> | |
385 <data format="fasta" name="custom_chocophlan_database" from_work_dir="output/*_temp/*_custom_chocophlan_database.ffn" label="${tool.name} on ${on_string}: Custom ChocoPhlAn database" > | |
386 <filter>out['intermediate_temp'] and "custom_chocophlan_database" in out['intermediate_temp']</filter> | |
387 </data> | |
388 <data format="tabular" name="diamond_aligned" from_work_dir="output/*_temp/*_diamond_aligned.tsv" label="${tool.name} on ${on_string}: Translated alignment results" > | |
389 <filter>out['intermediate_temp'] and "diamond_aligned" in out['intermediate_temp']</filter> | |
390 </data> | |
391 <data format="fasta" name="diamond_unaligned" from_work_dir="output/*_temp/*_diamond_unaligned.fa" label="${tool.name} on ${on_string}: Translated alignment unaligned reads" > | |
392 <filter>out['intermediate_temp'] and "diamond_unaligned" in out['intermediate_temp']</filter> | |
393 </data> | |
394 </outputs> | |
395 <tests> | |
396 <!-- This test refers to the Metaphlan v3 DB format, and hence should fail, which is to be caught gracefully. --> | |
397 <test expect_exit_code="42" expect_failure="true"> | |
398 <conditional name="in"> | |
399 <!-- raw fasta file --> | |
400 <param name="selector" value="raw"/> | |
401 <param name="input" value="demo.fastq.gz"/> | |
402 </conditional> | |
403 <conditional name="wf"> | |
404 <!-- full workflow --> | |
405 <param name="selector" value="none"/> | |
406 <section name="prescreen"> | |
407 <conditional name="metaphlan_db"> | |
408 <param name="selector" value="history"/> | |
409 <param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/> | |
410 <param name="mpa_pkl" value="test-db/metaphlan-db/old-structure/demo-db-v30.json"/> | |
411 </conditional> | |
412 <param name="prescreen_threshold" value="0.01"/> | |
413 </section> | |
414 <section name="nucleotide_search"> | |
415 <conditional name="nucleotide_db"> | |
416 <param name="selector" value="history"/> | |
417 <param name="nucleotide_database"> | |
418 <collection type="list"> | |
419 <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz" /> | |
420 <element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz" /> | |
421 </collection> | |
422 </param> | |
423 </conditional> | |
424 </section> | |
425 </conditional> | |
426 </test> | |
427 <!-- This test refers to the Metaphlan v4 DB format, and hence should be a straight pass. --> | |
428 <test expect_num_outputs="12"> | |
429 <conditional name="in"> | |
430 <!-- raw fasta file --> | |
431 <param name="selector" value="raw"/> | |
432 <param name="input" value="demo.fastq.gz"/> | |
433 </conditional> | |
434 <conditional name="wf"> | |
435 <!-- full workflow --> | |
436 <param name="selector" value="none"/> | |
437 <section name="prescreen"> | |
438 <conditional name="metaphlan_db"> | |
439 <param name="selector" value="history"/> | |
440 <param name="bowtie2db" value="test-db/metaphlan-db/demo-db-v30.fasta"/> | |
441 <param name="mpa_pkl" value="test-db/metaphlan-db/demo-db-v30.json"/> | |
442 </conditional> | |
443 <param name="prescreen_threshold" value="0.01"/> | |
444 </section> | |
445 <section name="nucleotide_search"> | |
446 <conditional name="nucleotide_db"> | |
447 <param name="selector" value="history"/> | |
448 <param name="nucleotide_database"> | |
449 <collection type="list"> | |
450 <element name="g__Bacteroides.s__Bacteroides_stercoris.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_dorei.centroids.v201901_v31.ffn.gz"/> | |
451 <element name="g__Bacteroides.s__Bacteroides_vulgatus.centroids" ftype="fasta.gz" value="test-db/nucleotide-db/g__Bacteroides.s__Bacteroides_vulgatus.centroids.v201901_v31.ffn.gz"/> | |
452 </collection> | |
453 </param> | |
454 </conditional> | |
455 <param name="nucleotide_identity_threshold" value="0"/> | |
456 <param name="nucleotide_subject_coverage_threshold" value="50"/> | |
457 <param name="nucleotide_query_coverage_threshold" value="90"/> | |
458 </section> | |
459 <section name="translated_search"> | |
460 <conditional name="protein_db"> | |
461 <param name="selector" value="history"/> | |
462 <param name="protein_database" value="test-db/protein-db/uniref90_demo_prots_v201901b.fasta"/> | |
463 <param name="search_mode" value="uniref90"/> | |
464 </conditional> | |
465 <param name="evalue" value="1"/> | |
466 <param name="translated_subject_coverage_threshold" value="50"/> | |
467 <param name="translated_query_coverage_threshold" value="90"/> | |
468 </section> | |
469 </conditional> | |
470 <section name="g_p_quant"> | |
471 <param name="gap_fill" value="true"/> | |
472 <param name="minpath" value="true"/> | |
473 <param name="pathways" value="metacyc"/> | |
474 <param name="xipe" value="false"/> | |
475 <param name="annotation_gene_index" value="3"/> | |
476 </section> | |
477 <section name="out"> | |
478 <!-- intermediate files --> | |
479 <param name="output_basename" value="humann"/> | |
480 <param name="log_level" value="DEBUG"/> | |
481 <param name="output_format" value="tsv"/> | |
482 <param name="output_max_decimals" value="10"/> | |
483 <param name="remove_column_description_output" value="false"/> | |
484 <param name="remove_statified_output" value="false"/> | |
485 <param name="intermediate_temp" | |
486 value="metaphlan_bowtie2,metaphlan_bugs_list,bowtie2_alignment,bowtie2_reduced_alignment,bowtie2_unaligned,custom_chocophlan_database,diamond_aligned,diamond_unaligned"/> | |
487 </section> | |
488 <output name="gene_families_tsv" ftype="tabular" value="demo_genefamilies.tsv" compare="sim_size"> | |
489 <assert_contents> | |
490 <has_text text="humann_Abundance-RPKs"/> | |
491 <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/> | |
492 <has_n_columns n="2"/> | |
493 </assert_contents> | |
494 </output> | |
495 <output name="pathcoverage_tsv" ftype="tabular" value="demo_pathcoverage.tsv" compare="sim_size"> | |
496 <assert_contents> | |
497 <has_text text="humann_Coverage"/> | |
498 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> | |
499 <has_n_columns n="2"/> | |
500 </assert_contents> | |
501 </output> | |
502 <output name="pathabundance_tsv" ftype="tabular" value="demo_pathabundance.tsv" compare="sim_size"> | |
503 <assert_contents> | |
504 <has_text text="humann_Abundance"/> | |
505 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> | |
506 <has_n_columns n="2"/> | |
507 </assert_contents> | |
508 </output> | |
509 <output name="log" ftype="txt"> | |
510 <assert_contents> | |
511 <has_text text="DATABASE SETTINGS"/> | |
512 <has_text text="humann.utilities"/> | |
513 <has_text text="humann_genefamilies"/> | |
514 <has_text text="humann_pathabundance"/> | |
515 <has_text text="humann_pathcoverage"/> | |
516 <has_text text="g__Bacteroides.s__Bacteroides_dorei"/> | |
517 </assert_contents> | |
518 </output> | |
519 <output name="metaphlan_bowtie2" ftype="tabular"> | |
520 <assert_contents> | |
521 <has_text text="s__Bacteroides_dorei_read000116"/> | |
522 <has_text text="357276__I9R1V6__DXD47_04125"/> | |
523 <has_text text="s__Bacteroides_dorei_read000129"/> | |
524 <has_text text="357276__B6W1Y5__IY41_11405"/> | |
525 </assert_contents> | |
526 </output> | |
527 <output name="metaphlan_bugs_list" ftype="tabular"> | |
528 <assert_contents> | |
529 <has_text text="relative_abundance"/> | |
530 <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_vulgatus"/> | |
531 <has_text text="k__Bacteria|p__Bacteroidetes|c__Bacteroidia|o__Bacteroidales|f__Bacteroidaceae|g__Bacteroides|s__Bacteroides_dorei"/> | |
532 </assert_contents> | |
533 </output> | |
534 <output name="bowtie2_alignment" ftype="sam"> | |
535 <assert_contents> | |
536 <has_text text="SN:821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/> | |
537 <has_text text="s__Bacteroides_dorei_read009840"/> | |
538 <has_text text="PN:bowtie2"/> | |
539 <has_text text="LN:1281"/> | |
540 </assert_contents> | |
541 </output> | |
542 <output name="bowtie2_reduced_alignment" ftype="tabular"> | |
543 <assert_contents> | |
544 <has_text text="s__Bacteroides_dorei_read000001"/> | |
545 <has_text text="821__A6L5K0__BVU_3338|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A6L5K0|UniRef50_A6L5K0|468"/> | |
546 <has_text text="s__Bacteroides_vulgatus_read003845"/> | |
547 <has_text text="821__A0A396BBC3__DXC03_14350|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A396BBC3|UniRef50_A0A174FNA3|2934"/> | |
548 </assert_contents> | |
549 </output> | |
550 <output name="bowtie2_unaligned" ftype="fasta"> | |
551 <assert_contents> | |
552 <has_text text=">s__Bacteroides_dorei_read000001|100"/> | |
553 <has_text text=">s__Bacteroides_dorei_read000002|100"/> | |
554 <has_text text=">unclassified_read000971|100"/> | |
555 <has_text text=">s__Bacteroides_vulgatus_read004473|100"/> | |
556 </assert_contents> | |
557 </output> | |
558 <output name="custom_chocophlan_database" ftype="fasta"> | |
559 <assert_contents> | |
560 <has_text text=">821__F3PQ30__HMPREF9446_00822|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PQ30|UniRef50_F3PQ30|510"/> | |
561 <has_text text=">821__F3PUY1__HMPREF9446_02555|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_F3PUY1|UniRef50_A0A3E5DX68|411"/> | |
562 <has_text text=">821__A0A3E4KCH0__DXD33_19495|k__Bacteria.p__Bacteroidetes.c__Bacteroidia.o__Bacteroidales.f__Bacteroidaceae.g__Bacteroides.s__Bacteroides_vulgatus|UniRef90_A0A3E4KCH0|UniRef50_F3PP72|3582"/> | |
563 </assert_contents> | |
564 </output> | |
565 <output name="diamond_aligned" ftype="tabular"> | |
566 <assert_contents> | |
567 <has_text text="UniRef90_Z5XVM9|969"/> | |
568 <has_text text="s__Bacteroides_vulgatus_read"/> | |
569 <has_text text="s__Bacteroides_vulgatus_read"/> | |
570 <has_text text="UniRef90_Y0KEF3|618"/> | |
571 </assert_contents> | |
572 </output> | |
573 <output name="diamond_unaligned" ftype="fasta"> | |
574 <assert_contents> | |
575 <has_text text=">s__Bacteroides_dorei_read000001|100"/> | |
576 <has_text text=">s__Bacteroides_vulgatus_read006412|100"/> | |
577 <has_text text=">unclassified_read000867|100"/> | |
578 </assert_contents> | |
579 </output> | |
580 </test> | |
581 <test expect_num_outputs="4"> | |
582 <conditional name="in"> | |
583 <!-- fastq file --> | |
584 <param name="selector" value="raw"/> | |
585 <param name="input" value="demo.fasta.gz"/> | |
586 </conditional> | |
587 <conditional name="wf"> | |
588 <!-- bypass_prescreen --> | |
589 <param name="selector" value="bypass_prescreen"/> | |
590 <section name="nucleotide_search"> | |
591 <conditional name="nucleotide_db"> | |
592 <param name="selector" value="cached"/> | |
593 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> | |
594 </conditional> | |
595 <param name="nucleotide_identity_threshold" value="0"/> | |
596 <param name="nucleotide_subject_coverage_threshold" value="50"/> | |
597 <param name="nucleotide_query_coverage_threshold" value="90"/> | |
598 </section> | |
599 <section name="translated_search"> | |
600 <conditional name="protein_db"> | |
601 <param name="selector" value="cached"/> | |
602 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> | |
603 </conditional> | |
604 <param name="evalue" value="1"/> | |
605 <param name="translated_subject_coverage_threshold" value="50"/> | |
606 <param name="translated_query_coverage_threshold" value="90"/> | |
607 </section> | |
608 </conditional> | |
609 <section name="g_p_quant"> | |
610 <param name="gap_fill" value="true"/> | |
611 <param name="minpath" value="true"/> | |
612 <param name="pathways" value="metacyc"/> | |
613 <param name="xipe" value="false"/> | |
614 <param name="annotation_gene_index" value="3"/> | |
615 </section> | |
616 <section name="out"> | |
617 <!-- Biom --> | |
618 <param name="output_basename" value="humann"/> | |
619 <param name="log_level" value="DEBUG"/> | |
620 <param name="output_format" value="biom"/> | |
621 <param name="output_max_decimals" value="10"/> | |
622 <param name="remove_column_description_output" value="false"/> | |
623 <param name="remove_statified_output" value="false"/> | |
624 <param name="intermediate_temp" value=""/> | |
625 </section> | |
626 <output name="gene_families_biom" ftype="biom1"> | |
627 <assert_contents> | |
628 <has_text text="biom-format"/> | |
629 <has_text text="UniRef90_A0A396BPQ7|g__Bacteroides.s__Bacteroides_vulgatus"/> | |
630 <has_text text="UniRef90_W8YTG4|unclassified"/> | |
631 </assert_contents> | |
632 </output> | |
633 <output name="pathcoverage_biom" ftype="biom1"> | |
634 <assert_contents> | |
635 <has_text text="TREE"/> | |
636 <has_text text="format-url"/> | |
637 <has_text text="biom-format"/> | |
638 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> | |
639 <has_text text="humann_Coverage"/> | |
640 </assert_contents> | |
641 </output> | |
642 <output name="pathabundance_biom" ftype="biom1"> | |
643 <assert_contents> | |
644 <has_text text="TREE"/> | |
645 <has_text text="format-url"/> | |
646 <has_text text="biom-format"/> | |
647 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> | |
648 <has_text text="humann_Abundance"/> | |
649 </assert_contents> | |
650 </output> | |
651 <output name="log" ftype="txt"> | |
652 <assert_contents> | |
653 <has_text text="Running bowtie2-build ........"/> | |
654 <has_text text="Total bugs from nucleotide alignment: 2"/> | |
655 <has_text text="Total gene families from nucleotide alignment: "/> | |
656 <has_text text="Aligning to reference database: "/> | |
657 <has_text text="Total gene families after translated alignment: "/> | |
658 </assert_contents> | |
659 </output> | |
660 </test> | |
661 <test expect_num_outputs="4"> | |
662 <conditional name="in"> | |
663 <param name="selector" value="raw"/> | |
664 <param name="input" value="demo.fasta.gz"/> | |
665 </conditional> | |
666 <conditional name="wf"> | |
667 <!-- bypass_taxonomic_profiling --> | |
668 <param name="selector" value="bypass_taxonomic_profiling"/> | |
669 <param name="taxonomic_profile" value="demo-taxonomic-profile.tabular"/> | |
670 <section name="nucleotide_search"> | |
671 <conditional name="nucleotide_db"> | |
672 <param name="selector" value="cached"/> | |
673 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> | |
674 </conditional> | |
675 <param name="nucleotide_identity_threshold" value="0"/> | |
676 <param name="nucleotide_subject_coverage_threshold" value="50"/> | |
677 <param name="nucleotide_query_coverage_threshold" value="90"/> | |
678 </section> | |
679 <section name="translated_search"> | |
680 <conditional name="protein_db"> | |
681 <param name="selector" value="cached"/> | |
682 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> | |
683 </conditional> | |
684 <param name="evalue" value="1"/> | |
685 <param name="translated_subject_coverage_threshold" value="50"/> | |
686 <param name="translated_query_coverage_threshold" value="90"/> | |
687 </section> | |
688 </conditional> | |
689 <section name="g_p_quant"> | |
690 <param name="gap_fill" value="true"/> | |
691 <param name="minpath" value="true"/> | |
692 <param name="pathways" value="metacyc"/> | |
693 <param name="xipe" value="false"/> | |
694 <param name="annotation_gene_index" value="3"/> | |
695 </section> | |
696 <section name="out"> | |
697 <param name="output_basename" value="humann"/> | |
698 <param name="log_level" value="DEBUG"/> | |
699 <param name="output_format" value="tsv"/> | |
700 <param name="output_max_decimals" value="10"/> | |
701 <param name="remove_column_description_output" value="false"/> | |
702 <param name="remove_statified_output" value="false"/> | |
703 <param name="intermediate_temp" value=""/> | |
704 </section> | |
705 <output name="gene_families_tsv" ftype="tabular"> | |
706 <assert_contents> | |
707 <has_text text="humann_Abundance-RPKs"/> | |
708 <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/> | |
709 <has_text text="UniRef90_A0A078RDY6|g__Bacteroides.s__Bacteroides_vulgatus"/> | |
710 <has_n_columns n="2"/> | |
711 </assert_contents> | |
712 </output> | |
713 <output name="pathcoverage_tsv" ftype="tabular"> | |
714 <assert_contents> | |
715 <has_text text="humann_Coverage"/> | |
716 <has_text text="UNINTEGRATED|unclassified"/> | |
717 <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/> | |
718 <has_n_columns n="2"/> | |
719 </assert_contents> | |
720 </output> | |
721 <output name="pathabundance_tsv" ftype="tabular"> | |
722 <assert_contents> | |
723 <has_text text="humann_Abundance"/> | |
724 <has_text text="UNINTEGRATED|unclassified"/> | |
725 <has_text text="PWY-4203: volatile benzenoid biosynthesis I (ester formation)|unclassified"/> | |
726 | |
727 <has_n_columns n="2"/> | |
728 </assert_contents> | |
729 </output> | |
730 <output name="log" ftype="txt"> | |
731 <assert_contents> | |
732 <has_text text="Found g__Bacteroides.s__Bacteroides_vulgatus : "/> | |
733 <has_text text="Total species selected from prescreen: 2"/> | |
734 <has_text text="Total bugs from nucleotide alignment: 2"/> | |
735 <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: "/> | |
736 <has_text text="g__Bacteroides.s__Bacteroides_dorei: "/> | |
737 <has_text text="Total gene families from nucleotide alignment: "/> | |
738 <has_text text="Total bugs after translated alignment: 3"/> | |
739 <has_text text="Total gene families after translated alignment"/> | |
740 </assert_contents> | |
741 </output> | |
742 </test> | |
743 <test expect_num_outputs="4"> | |
744 <conditional name="in"> | |
745 <!-- mapping SAM file --> | |
746 <param name="selector" value="mapping"/> | |
747 <param name="input" value="demo.sam"/> | |
748 </conditional> | |
749 <conditional name="wf"> | |
750 <!-- bypass_nucleotide_index --> | |
751 <param name="selector" value="bypass_nucleotide_index"/> | |
752 <section name="nucleotide_search"> | |
753 <conditional name="nucleotide_db"> | |
754 <param name="selector" value="cached"/> | |
755 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> | |
756 </conditional> | |
757 <param name="nucleotide_identity_threshold" value="0"/> | |
758 <param name="nucleotide_subject_coverage_threshold" value="50"/> | |
759 <param name="nucleotide_query_coverage_threshold" value="90"/> | |
760 </section> | |
761 <section name="translated_search"> | |
762 <conditional name="protein_db"> | |
763 <param name="selector" value="cached"/> | |
764 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> | |
765 </conditional> | |
766 <param name="evalue" value="1"/> | |
767 <param name="translated_subject_coverage_threshold" value="50"/> | |
768 <param name="translated_query_coverage_threshold" value="90"/> | |
769 </section> | |
770 </conditional> | |
771 <section name="g_p_quant"> | |
772 <param name="gap_fill" value="true"/> | |
773 <param name="minpath" value="true"/> | |
774 <param name="pathways" value="metacyc"/> | |
775 <param name="xipe" value="false"/> | |
776 <param name="annotation_gene_index" value="3"/> | |
777 </section> | |
778 <section name="out"> | |
779 <param name="output_basename" value="humann"/> | |
780 <param name="log_level" value="DEBUG"/> | |
781 <param name="output_format" value="tsv"/> | |
782 <param name="output_max_decimals" value="10"/> | |
783 <param name="remove_column_description_output" value="false"/> | |
784 <param name="remove_statified_output" value="false"/> | |
785 <param name="intermediate_temp" value=""/> | |
786 </section> | |
787 <output name="gene_families_tsv" ftype="tabular"> | |
788 <assert_contents> | |
789 <has_text text="UniRef90_R6HHA8|g__Bacteroides.s__Bacteroides_dorei"/> | |
790 <has_text text="UniRef90_unknown|g__Bacteroides.s__Bacteroides_vulgatus"/> | |
791 <has_n_columns n="2"/> | |
792 </assert_contents> | |
793 </output> | |
794 <output name="pathcoverage_tsv" ftype="tabular"> | |
795 <assert_contents> | |
796 <has_text text="UNMAPPED"/> | |
797 <has_text text="UNINTEGRATED"/> | |
798 <has_n_columns n="2"/> | |
799 </assert_contents> | |
800 </output> | |
801 <output name="pathabundance_tsv" ftype="tabular"> | |
802 <assert_contents> | |
803 <has_text text="UNMAPPED"/> | |
804 <has_text text="UNINTEGRATED"/> | |
805 <has_n_columns n="2"/> | |
806 </assert_contents> | |
807 </output> | |
808 <output name="log" ftype="txt"> | |
809 <assert_contents> | |
810 <has_text text="Process the sam mapping results"/> | |
811 <has_text text="Computing gene families"/> | |
812 <has_text text="Computing pathways abundance and coverage"/> | |
813 </assert_contents> | |
814 </output> | |
815 </test> | |
816 <test expect_num_outputs="4"> | |
817 <conditional name="in"> | |
818 <!-- raw fasta file --> | |
819 <param name="selector" value="raw"/> | |
820 <param name="input" value="demo.fastq.gz"/> | |
821 </conditional> | |
822 <conditional name="wf"> | |
823 <!-- bypass_nucleotide_search --> | |
824 <param name="selector" value="bypass_nucleotide_search"/> | |
825 <section name="translated_search"> | |
826 <conditional name="protein_db"> | |
827 <param name="selector" value="cached"/> | |
828 <param name="protein_database" value="uniref-DEMO_diamond-20210421"/> | |
829 </conditional> | |
830 <param name="evalue" value="1"/> | |
831 <param name="translated_subject_coverage_threshold" value="50"/> | |
832 <param name="translated_query_coverage_threshold" value="90"/> | |
833 </section> | |
834 </conditional> | |
835 <section name="g_p_quant"> | |
836 <param name="gap_fill" value="true"/> | |
837 <param name="minpath" value="true"/> | |
838 <param name="pathways" value="metacyc"/> | |
839 <param name="xipe" value="false"/> | |
840 <param name="annotation_gene_index" value="3"/> | |
841 </section> | |
842 <section name="out"> | |
843 <param name="output_basename" value="humann"/> | |
844 <param name="log_level" value="DEBUG"/> | |
845 <param name="output_format" value="tsv"/> | |
846 <param name="output_max_decimals" value="10"/> | |
847 <param name="remove_column_description_output" value="false"/> | |
848 <param name="remove_statified_output" value="false"/> | |
849 <param name="intermediate_temp" value=""/> | |
850 </section> | |
851 <output name="gene_families_tsv" ftype="tabular"> | |
852 <assert_contents> | |
853 <has_text text="humann_Abundance-RPKs"/> | |
854 <has_text text="UniRef90_Q9ZUH4|unclassified"/> | |
855 <has_n_columns n="2"/> | |
856 </assert_contents> | |
857 </output> | |
858 <output name="pathcoverage_tsv" ftype="tabular"> | |
859 <assert_contents> | |
860 <has_text text="humann_Coverage"/> | |
861 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> | |
862 <has_n_columns n="2"/> | |
863 </assert_contents> | |
864 </output> | |
865 <output name="pathabundance_tsv" ftype="tabular"> | |
866 <assert_contents> | |
867 <has_text text="humann_Abundance"/> | |
868 <has_text text="PWY-5423: oleoresin monoterpene volatiles biosynthesis|unclassified"/> | |
869 <has_n_columns n="2"/> | |
870 </assert_contents> | |
871 </output> | |
872 <output name="log" ftype="txt"> | |
873 <assert_contents> | |
874 <has_text text="Total bugs after translated alignment: 1"/> | |
875 <has_text text="unclassified: "/> | |
876 <has_text text="Unaligned reads after translated alignment: "/> | |
877 <has_text text="Total gene families"/> | |
878 </assert_contents> | |
879 </output> | |
880 </test> | |
881 <test expect_num_outputs="4"> | |
882 <conditional name="in"> | |
883 <!-- raw fasta file --> | |
884 <param name="selector" value="raw"/> | |
885 <param name="input" value="demo.fastq.gz"/> | |
886 </conditional> | |
887 <conditional name="wf"> | |
888 <!-- bypass_translated_search --> | |
889 <param name="selector" value="bypass_translated_search"/> | |
890 <section name="prescreen"> | |
891 <conditional name="metaphlan_db"> | |
892 <param name="selector" value="cached"/> | |
893 <param name="cached_db" value="metaphlan-demo-db-20210421"/> | |
894 </conditional> | |
895 <param name="prescreen_threshold" value="0.01"/> | |
896 </section> | |
897 <section name="nucleotide_search"> | |
898 <conditional name="nucleotide_db"> | |
899 <param name="selector" value="cached"/> | |
900 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> | |
901 </conditional> | |
902 <param name="nucleotide_identity_threshold" value="0"/> | |
903 <param name="nucleotide_subject_coverage_threshold" value="50"/> | |
904 <param name="nucleotide_query_coverage_threshold" value="90"/> | |
905 </section> | |
906 </conditional> | |
907 <section name="g_p_quant"> | |
908 <param name="gap_fill" value="true"/> | |
909 <param name="minpath" value="true"/> | |
910 <param name="pathways" value="metacyc"/> | |
911 <param name="xipe" value="false"/> | |
912 <param name="annotation_gene_index" value="3"/> | |
913 </section> | |
914 <section name="out"> | |
915 <param name="output_basename" value="newname"/> | |
916 <param name="log_level" value="DEBUG"/> | |
917 <param name="output_format" value="tsv"/> | |
918 <param name="output_max_decimals" value="10"/> | |
919 <param name="remove_column_description_output" value="false"/> | |
920 <param name="remove_statified_output" value="false"/> | |
921 <param name="intermediate_temp" value=""/> | |
922 </section> | |
923 <output name="gene_families_tsv" ftype="tabular"> | |
924 <assert_contents> | |
925 <has_text text="newname_Abundance-RPKs"/> | |
926 <has_text text="UniRef90_G1UL42|g__Bacteroides.s__Bacteroides_dorei"/> | |
927 <has_n_columns n="2"/> | |
928 </assert_contents> | |
929 </output> | |
930 <output name="pathcoverage_tsv" ftype="tabular"> | |
931 <assert_contents> | |
932 <has_text text="newname_Coverage"/> | |
933 <has_text text="UNMAPPED"/> | |
934 <has_text text="UNINTEGRATED"/> | |
935 <has_n_columns n="2"/> | |
936 </assert_contents> | |
937 </output> | |
938 <output name="pathabundance_tsv" ftype="tabular"> | |
939 <assert_contents> | |
940 <has_text text="newname_Abundance"/> | |
941 <has_text text="UNMAPPED"/> | |
942 <has_text text="UNINTEGRATED"/> | |
943 <has_n_columns n="2"/> | |
944 </assert_contents> | |
945 </output> | |
946 <output name="log" ftype="txt"> | |
947 <assert_contents> | |
948 <has_text text="Total bugs from nucleotide alignment: 2"/> | |
949 <has_text text="g__Bacteroides.s__Bacteroides_vulgatus: 1195 hits"/> | |
950 <has_text text="g__Bacteroides.s__Bacteroides_dorei: 1260 hits"/> | |
951 <has_text text="Total gene families from nucleotide alignment: 545"/> | |
952 <has_text text="Bypass translated search"/> | |
953 </assert_contents> | |
954 </output> | |
955 </test> | |
956 <!-- This test should fail for as metaphlan v4 is invoked on outdated input DB. --> | |
957 <test expect_exit_code="1" expect_failure="true"> | |
958 <conditional name="in"> | |
959 <!-- raw fasta file --> | |
960 <param name="selector" value="raw"/> | |
961 <param name="input" value="demo.fastq.gz"/> | |
962 </conditional> | |
963 <conditional name="wf"> | |
964 <!-- bypass_translated_search --> | |
965 <param name="selector" value="bypass_translated_search"/> | |
966 <section name="prescreen"> | |
967 <conditional name="metaphlan_db"> | |
968 <param name="selector" value="cached"/> | |
969 <param name="cached_db" value="metaphlan-db-old-structure"/> | |
970 </conditional> | |
971 </section> | |
972 <section name="nucleotide_search"> | |
973 <conditional name="nucleotide_db"> | |
974 <param name="selector" value="cached"/> | |
975 <param name="nucleotide_database" value="chocophlan-DEMO-20210421"/> | |
976 </conditional> | |
977 </section> | |
978 </conditional> | |
979 </test> | |
980 </tests> | |
981 <help><![CDATA[ | |
982 @HELP_HEADER@ | |
983 | |
984 This tool corresponds to the main tool in HUMAnN pipeline: | |
985 | |
986 | |
987 1. Taxomonic prescreen | |
988 | |
989 Reads are mapped (with MetaPhlAn) to clade-specific marker genes to rapidly identify community species | |
990 | |
991 2. Pangenome search (nucleotide search) | |
992 | |
993 Reads are mapped (with Bowtie2) to pangenomes of identified species | |
994 | |
995 3. Translated search | |
996 | |
997 Unclassified reads are aligned to a comprehensive and non-redundant protein database | |
998 | |
999 4. Gene family and pathway quantification | |
1000 | |
1001 - Gene abundance estimation | |
1002 | |
1003 Mapping results are processed to estimate per-species and community total gene family abundance, weighting by | |
1004 | |
1005 - alignment Quality | |
1006 - gene length | |
1007 - gene coverage | |
1008 | |
1009 - Per-species and community-level metabolic network reconstruction | |
1010 | |
1011 Genes are mapped to metabolic reactions to identify a parsiomonious set of pathways that explains each species' observed reactions | |
1012 | |
1013 Pathway abundance and coverage are quantified by: | |
1014 | |
1015 1. optimizing over alternative subpathways | |
1016 2. imputing abundance for conspicuously depleted reactions | |
1017 | |
1018 | |
1019 Inputs | |
1020 ====== | |
1021 | |
1022 HUMAnN can start from a few different types of input data each in a few different types of formats: | |
1023 | |
1024 - Quality-controlled shotgun sequencing reads | |
1025 | |
1026 This is the most common starting point : A metagenome (DNA reads) or metatranscriptome (RNA reads) | |
1027 | |
1028 - Pre-computed mappings of reads to database sequences | |
1029 | |
1030 - Pre-computed (typically gene) abundance tables | |
1031 | |
1032 | |
1033 HUMAnN uses 3 reference databases | |
1034 Locally cached databases have to be downloaded before using them (using the dedicated tool). Custom databases can also be used after upload. | |
1035 | |
1036 Outputs | |
1037 ======= | |
1038 | |
1039 HUMAnN creates three output files: | |
1040 | |
1041 - Gene families and their abundance | |
1042 - Pathways and their abundance | |
1043 - Pathways and their coverage | |
1044 | |
1045 Ten intermediate temp output files can also be retrieved. | |
1046 | |
1047 ]]></help> | |
1048 <expand macro="citations"/> | |
1049 </tool> |